diff --git a/devel/site-list.py b/devel/site-list.py index e0fd21d53f..e594d04d0f 100644 --- a/devel/site-list.py +++ b/devel/site-list.py @@ -14,7 +14,7 @@ # Removes schema-specific keywords for proper processing social_networks = data.copy() -social_networks.pop('$schema', None) +social_networks.pop("$schema", None) # Sort the social networks in alphanumeric order social_networks = sorted(social_networks.items()) diff --git a/devel/summarize_site_validation.py b/devel/summarize_site_validation.py index 89d39750e1..4d91d59c25 100644 --- a/devel/summarize_site_validation.py +++ b/devel/summarize_site_validation.py @@ -6,10 +6,11 @@ import sys from pathlib import Path + def summarize_junit_xml(xml_path: Path) -> str: tree = ET.parse(xml_path) root = tree.getroot() - suite = root.find('testsuite') + suite = root.find("testsuite") pass_message: str = ":heavy_check_mark:   Pass" fail_message: str = ":x:   Fail" @@ -22,42 +23,53 @@ def summarize_junit_xml(xml_path: Path) -> str: summary_lines.append("| Target | F+ Check | F- Check |") summary_lines.append("|---|---|---|") - failures = int(suite.get('failures', 0)) + failures = int(suite.get("failures", 0)) errors_detected: bool = False results: dict[str, dict[str, str]] = {} - for testcase in suite.findall('testcase'): - test_name = testcase.get('name').split('[')[0] - site_name = testcase.get('name').split('[')[1].rstrip(']') - failure = testcase.find('failure') - error = testcase.find('error') + for testcase in suite.findall("testcase"): + test_name = testcase.get("name").split("[")[0] + site_name = testcase.get("name").split("[")[1].rstrip("]") + failure = testcase.find("failure") + error = testcase.find("error") if site_name not in results: results[site_name] = {} if test_name == "test_false_neg": - results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message + results[site_name]["F- Check"] = ( + pass_message if failure is None and error is None else fail_message + ) elif test_name == "test_false_pos": - results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message + results[site_name]["F+ Check"] = ( + pass_message if failure is None and error is None else fail_message + ) if error is not None: errors_detected = True for result in results: - summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |") + summary_lines.append( + f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |" + ) if failures > 0: - summary_lines.append("\n___\n" + - "\nFailures were detected on at least one updated target. Commits containing accuracy failures" + - " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).") + summary_lines.append( + "\n___\n" + + "\nFailures were detected on at least one updated target. Commits containing accuracy failures" + + " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences)." + ) if errors_detected: - summary_lines.append("\n___\n" + - "\n**Errors were detected during validation. Please review the workflow logs.**") + summary_lines.append( + "\n___\n" + + "\n**Errors were detected during validation. Please review the workflow logs.**" + ) return "\n".join(summary_lines) + if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: summarize_site_validation.py ") diff --git a/pyproject.toml b/pyproject.toml index 2ebad06406..c379de94a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ stem = "^1.8.0" pandas = "^2.2.1" openpyxl = "^3.0.10" tomli = "^2.2.1" +tqdm = "^4.66.0" [tool.poetry.group.dev.dependencies] jsonschema = "^4.0.0" diff --git a/sherlock_project/__init__.py b/sherlock_project/__init__.py index ad6c9e308f..89a7b28df8 100644 --- a/sherlock_project/__init__.py +++ b/sherlock_project/__init__.py @@ -1,4 +1,4 @@ -""" Sherlock Module +"""Sherlock Module This module contains the main logic to search for usernames at social networks. @@ -15,16 +15,21 @@ def get_version() -> str: try: return pkg_version("sherlock_project") except PackageNotFoundError: - pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml" + pyproject_path: pathlib.Path = ( + pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml" + ) with pyproject_path.open("rb") as f: pyproject_data = tomli.load(f) return pyproject_data["tool"]["poetry"]["version"] + # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package import_error_test_var = None -__shortname__ = "Sherlock" -__longname__ = "Sherlock: Find Usernames Across Social Networks" -__version__ = get_version() +__shortname__ = "Sherlock" +__longname__ = "Sherlock: Find Usernames Across Social Networks" +__version__ = get_version() -forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" +forge_api_latest_release = ( + "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" +) diff --git a/sherlock_project/__main__.py b/sherlock_project/__main__.py index a252de0fc1..427615a5dc 100644 --- a/sherlock_project/__main__.py +++ b/sherlock_project/__main__.py @@ -9,14 +9,16 @@ import sys - if __name__ == "__main__": # Check if the user is using the correct version of Python python_version = sys.version.split()[0] if sys.version_info < (3, 9): - print(f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock.") + print( + f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock." + ) sys.exit(1) from sherlock_project import sherlock + sherlock.main() diff --git a/sherlock_project/notify.py b/sherlock_project/notify.py index ab6f5a3866..bba9a12f1b 100644 --- a/sherlock_project/notify.py +++ b/sherlock_project/notify.py @@ -3,9 +3,11 @@ This module defines the objects for notifying the caller about the results of queries. """ + from sherlock_project.result import QueryStatus from colorama import Fore, Style import webbrowser +from tqdm import tqdm # Global variable to count the number of results. globvar = 0 @@ -37,7 +39,6 @@ def __init__(self, result=None): self.result = result - def start(self, message=None): """Notify Start. @@ -55,7 +56,6 @@ def start(self, message=None): Nothing. """ - def update(self, result): """Notify Update. @@ -73,7 +73,6 @@ def update(self, result): self.result = result - def finish(self, message=None): """Notify Finish. @@ -91,7 +90,6 @@ def finish(self, message=None): Nothing. """ - def __str__(self): """Convert Object To String. @@ -133,7 +131,6 @@ def __init__(self, result=None, verbose=False, print_all=False, browse=False): self.print_all = print_all self.browse = browse - def start(self, message): """Notify Start. @@ -150,14 +147,21 @@ def start(self, message): title = "Checking username" - print(Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + f"] {title}" + - Fore.WHITE + f" {message}" + - Fore.GREEN + " on:") + tqdm.write( + Style.BRIGHT + + Fore.GREEN + + "[" + + Fore.YELLOW + + "*" + + Fore.GREEN + + f"] {title}" + + Fore.WHITE + + f" {message}" + + Fore.GREEN + + " on:" + ) # An empty line between first line and the result(more clear output) - print('\r') - + tqdm.write("\r") def countResults(self): """This function counts the number of results. Every time the function is called, @@ -189,58 +193,100 @@ def update(self, result): self.result = result response_time_text = "" - if self.result.query_time is not None and self.verbose is True: - response_time_text = f" [{round(self.result.query_time * 1000)}ms]" + if self.result.query_time is not None: + response_time_text = ( + Fore.YELLOW + f" [{round(self.result.query_time * 1000)}ms]" + ) # Output to the terminal is desired. if result.status == QueryStatus.CLAIMED: self.countResults() - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.GREEN + "+" + - Fore.WHITE + "]" + - response_time_text + - Fore.GREEN + - f" {self.result.site_name}: " + - Style.RESET_ALL + - f"{self.result.site_url_user}") + tqdm.write( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.GREEN + + "+" + + Fore.WHITE + + "]" + + response_time_text + + Fore.GREEN + + f" {self.result.site_name}: " + + Style.RESET_ALL + + f"{self.result.site_url_user}" + ) if self.browse: webbrowser.open(self.result.site_url_user, 2) elif result.status == QueryStatus.AVAILABLE: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - response_time_text + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + " Not Found!") + tqdm.write( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + response_time_text + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.YELLOW + + " Not Found!" + ) elif result.status == QueryStatus.UNKNOWN: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.RED + f" {self.result.context}" + - Fore.YELLOW + " ") + tqdm.write( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.RED + + f" {self.result.context}" + + Fore.YELLOW + + " " + ) elif result.status == QueryStatus.ILLEGAL: if self.print_all: msg = "Illegal Username Format For This Site!" - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + f" {msg}") + tqdm.write( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.YELLOW + + f" {msg}" + ) elif result.status == QueryStatus.WAF: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.RED + " Blocked by bot detection" + - Fore.YELLOW + " (proxy may help)") + tqdm.write( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.RED + + " Blocked by bot detection" + + Fore.YELLOW + + " (proxy may help)" + ) else: # It should be impossible to ever get here... @@ -248,24 +294,37 @@ def update(self, result): f"Unknown Query Status '{result.status}' for site '{self.result.site_name}'" ) - - def finish(self, message="The processing has been finished."): + def finish(self, total_time=None): """Notify Finish. Will print the last line to the standard output. Keyword Arguments: self -- This object. - message -- The 2 last phrases. + total_time -- Total time taken for the search. Return Value: Nothing. """ NumberOfResults = self.countResults() - 1 - print(Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + "] Search completed with" + - Fore.WHITE + f" {NumberOfResults} " + - Fore.GREEN + "results" + Style.RESET_ALL - ) + total_time_text = "" + if total_time is not None: + total_time_text = f" in {round(total_time, 2)} seconds" + + tqdm.write( + Style.BRIGHT + + Fore.GREEN + + "[" + + Fore.YELLOW + + "*" + + Fore.GREEN + + "] Search completed with" + + Fore.WHITE + + f" {NumberOfResults} " + + Fore.GREEN + + "results" + + Fore.YELLOW + + total_time_text + + Style.RESET_ALL + ) def __str__(self): """Convert Object To String. diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 47c39438de..889b21892f 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -37,6 +37,13 @@ "urlMain": "https://apclips.com/", "username_claimed": "onlybbyraq" }, + "AWS Skills Profile": { + "errorMsg": "shareProfileAccepted\":false", + "errorType": "message", + "url": "https://skillsprofile.skillbuilder.aws/user/{}/", + "urlMain": "https://skillsprofile.skillbuilder.aws", + "username_claimed": "mayank04pant" + }, "About.me": { "errorType": "status_code", "url": "https://about.me/{}", @@ -108,6 +115,14 @@ "urlProbe": "https://graphql.anilist.co/", "username_claimed": "Josh" }, + "Aparat": { + "errorType": "status_code", + "request_method": "GET", + "url": "https://www.aparat.com/{}/", + "urlMain": "https://www.aparat.com/", + "urlProbe": "https://www.aparat.com/api/fa/v1/user/user/information/username/{}", + "username_claimed": "jadi" + }, "Apple Developer": { "errorType": "status_code", "url": "https://developer.apple.com/forums/profile/{}", @@ -121,14 +136,6 @@ "urlMain": "https://discussions.apple.com", "username_claimed": "jason" }, - "Aparat": { - "errorType": "status_code", - "request_method": "GET", - "url": "https://www.aparat.com/{}/", - "urlMain": "https://www.aparat.com/", - "urlProbe": "https://www.aparat.com/api/fa/v1/user/user/information/username/{}", - "username_claimed": "jadi" - }, "Archive of Our Own": { "errorType": "status_code", "regexCheck": "^[^.]*?$", @@ -177,19 +184,12 @@ "errorType": "status_code", "url": "https://atcoder.jp/users/{}", "urlMain": "https://atcoder.jp/", - "username_claimed": "ksun48" - }, - "Vjudge": { - "errorType": "status_code", - "url": "https://VJudge.net/user/{}", - "urlMain": "https://VJudge.net/", - "username_claimed": "tokitsukaze" + "username_claimed": "blue" }, - "Audiojungle": { + "Author.today": { "errorType": "status_code", - "regexCheck": "^[a-zA-Z0-9_]+$", - "url": "https://audiojungle.net/user/{}", - "urlMain": "https://audiojungle.net/", + "url": "https://author.today/u/{}", + "urlMain": "https://author.today/", "username_claimed": "blue" }, "Autofrage": { @@ -205,13 +205,6 @@ "urlMain": "https://www.avizo.cz/", "username_claimed": "blue" }, - "AWS Skills Profile": { - "errorType": "message", - "errorMsg": "shareProfileAccepted\":false", - "url": "https://skillsprofile.skillbuilder.aws/user/{}/", - "urlMain": "https://skillsprofile.skillbuilder.aws", - "username_claimed": "mayank04pant" - }, "BOOTH": { "errorType": "response_url", "errorUrl": "https://booth.pm/", @@ -220,6 +213,16 @@ "urlMain": "https://booth.pm/", "username_claimed": "blue" }, + "BabyRu": { + "errorMsg": [ + "\u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430, \u043a\u043e\u0442\u043e\u0440\u0443\u044e \u0432\u044b \u0438\u0441\u043a\u0430\u043b\u0438, \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430", + "\u0414\u043e\u0441\u0442\u0443\u043f \u0441 \u0432\u0430\u0448\u0435\u0433\u043e IP-\u0430\u0434\u0440\u0435\u0441\u0430 \u0432\u0440\u0435\u043c\u0435\u043d\u043d\u043e \u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d" + ], + "errorType": "message", + "url": "https://www.baby.ru/u/{}", + "urlMain": "https://www.baby.ru/", + "username_claimed": "example" + }, "Bandcamp": { "errorType": "status_code", "url": "https://www.bandcamp.com/{}", @@ -233,6 +236,12 @@ "urlMain": "https://www.bazar.cz/", "username_claimed": "pianina" }, + "Beehiiv": { + "errorType": "status_code", + "url": "https://{}.beehiiv.com", + "urlMain": "https://www.beehiiv.com/", + "username_claimed": "blue" + }, "Behance": { "errorType": "status_code", "url": "https://www.behance.net/{}", @@ -251,6 +260,12 @@ "urlMain": "https://www.biggerpockets.com/", "username_claimed": "blue" }, + "Bio.link": { + "errorType": "status_code", + "url": "https://bio.link/{}", + "urlMain": "https://bio.link/", + "username_claimed": "blue" + }, "BioHacking": { "errorType": "status_code", "url": "https://forum.dangerousthings.com/u/{}", @@ -294,10 +309,18 @@ "Bluesky": { "errorType": "status_code", "url": "https://bsky.app/profile/{}.bsky.social", - "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social", "urlMain": "https://bsky.app/", + "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social", "username_claimed": "mcuban" }, + "BoardGameGeek": { + "errorMsg": "\"isValid\":true", + "errorType": "message", + "url": "https://boardgamegeek.com/user/{}", + "urlMain": "https://boardgamegeek.com/", + "urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}", + "username_claimed": "blue" + }, "BongaCams": { "errorType": "status_code", "isNSFW": true, @@ -311,14 +334,6 @@ "urlMain": "https://www.bookcrossing.com/", "username_claimed": "blue" }, - "BoardGameGeek": { - "errorMsg": "\"isValid\":true", - "errorType": "message", - "url": "https://boardgamegeek.com/user/{}", - "urlMain": "https://boardgamegeek.com/", - "urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}", - "username_claimed": "blue" - }, "BraveCommunity": { "errorType": "status_code", "url": "https://community.brave.com/u/{}/", @@ -352,12 +367,6 @@ "urlMain": "https://buzzfeed.com/", "username_claimed": "blue" }, - "Cfx.re Forum": { - "errorType": "status_code", - "url": "https://forum.cfx.re/u/{}/summary", - "urlMain": "https://forum.cfx.re", - "username_claimed": "hightowerlssd" - }, "CGTrader": { "errorType": "status_code", "regexCheck": "^[^.]*?$", @@ -411,12 +420,24 @@ "urlMain": "https://career.habr.com/", "username_claimed": "blue" }, + "Carrd": { + "errorType": "status_code", + "url": "https://{}.carrd.co", + "urlMain": "https://carrd.co/", + "username_claimed": "about" + }, "CashApp": { "errorType": "status_code", "url": "https://cash.app/${}", "urlMain": "https://cash.app", "username_claimed": "hotdiggitydog" }, + "Cfx.re Forum": { + "errorType": "status_code", + "url": "https://forum.cfx.re/u/{}/summary", + "urlMain": "https://forum.cfx.re", + "username_claimed": "hightowerlssd" + }, "Championat": { "errorType": "status_code", "url": "https://www.championat.com/user/{}", @@ -459,16 +480,17 @@ "urlMain": "https://choice.community/", "username_claimed": "gordon" }, - "Clapper": { + "Chollometro": { "errorType": "status_code", - "url": "https://clapperapp.com/{}", - "urlMain": "https://clapperapp.com/", + "request_method": "GET", + "url": "https://www.chollometro.com/profile/{}", + "urlMain": "https://www.chollometro.com/", "username_claimed": "blue" }, - "CloudflareCommunity": { + "Clapper": { "errorType": "status_code", - "url": "https://community.cloudflare.com/u/{}", - "urlMain": "https://community.cloudflare.com/", + "url": "https://clapperapp.com/{}", + "urlMain": "https://clapperapp.com/", "username_claimed": "blue" }, "Clozemaster": { @@ -491,6 +513,15 @@ "urlMain": "https://codesnippets.fandom.com", "username_claimed": "bob" }, + "CodeSandbox": { + "errorMsg": "Could not find user with username", + "errorType": "message", + "regexCheck": "^[a-zA-Z0-9_-]{3,30}$", + "url": "https://codesandbox.io/u/{}", + "urlMain": "https://codesandbox.io", + "urlProbe": "https://codesandbox.io/api/v1/users/{}", + "username_claimed": "icyjoseph" + }, "Codeberg": { "errorType": "status_code", "url": "https://codeberg.org/{}", @@ -538,15 +569,6 @@ "urlMain": "https://coderwall.com", "username_claimed": "hacker" }, - "CodeSandbox": { - "errorType": "message", - "errorMsg": "Could not find user with username", - "regexCheck": "^[a-zA-Z0-9_-]{3,30}$", - "url": "https://codesandbox.io/u/{}", - "urlProbe": "https://codesandbox.io/api/v1/users/{}", - "urlMain": "https://codesandbox.io", - "username_claimed": "icyjoseph" - }, "Codewars": { "errorType": "status_code", "url": "https://www.codewars.com/users/{}", @@ -554,12 +576,12 @@ "username_claimed": "example" }, "Codolio": { - "errorType": "message", "errorMsg": "Page Not Found | Codolio", + "errorType": "message", + "regexCheck": "^[a-zA-Z0-9_-]{3,30}$", "url": "https://codolio.com/profile/{}", "urlMain": "https://codolio.com/", - "username_claimed": "testuser", - "regexCheck": "^[a-zA-Z0-9_-]{3,30}$" + "username_claimed": "testuser" }, "Coinvote": { "errorType": "status_code", @@ -581,6 +603,12 @@ "urlMain": "https://contently.com/", "username_claimed": "jordanteicher" }, + "Contra": { + "errorType": "status_code", + "url": "https://contra.com/@{}", + "urlMain": "https://contra.com/", + "username_claimed": "gajus" + }, "Coroflot": { "errorType": "status_code", "url": "https://www.coroflot.com/{}", @@ -588,8 +616,8 @@ "username_claimed": "blue" }, "Cplusplus": { - "errorType": "message", "errorMsg": "404 Page Not Found", + "errorType": "message", "url": "https://cplusplus.com/user/{}", "urlMain": "https://cplusplus.com", "username_claimed": "mbozzi" @@ -648,6 +676,13 @@ "urlMain": "https://cults3d.com/en", "username_claimed": "brown" }, + "CurseForge": { + "errorCode": 404, + "errorType": "status_code", + "url": "https://www.curseforge.com/members/{}/projects", + "urlMain": "https://www.curseforge.com.", + "username_claimed": "blue" + }, "CyberDefenders": { "errorType": "status_code", "regexCheck": "^[^\\/:*?\"<>|@]{3,50}$", @@ -676,12 +711,6 @@ "urlMain": "https://www.dailymotion.com/", "username_claimed": "blue" }, - "dcinside": { - "errorType": "status_code", - "url": "https://gallog.dcinside.com/{}", - "urlMain": "https://www.dcinside.com/", - "username_claimed": "anrbrb" - }, "Dealabs": { "errorMsg": "La page que vous essayez", "errorType": "message", @@ -691,8 +720,8 @@ "username_claimed": "blue" }, "DeviantArt": { - "errorType": "message", "errorMsg": "Llama Not Found", + "errorType": "message", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", "url": "https://www.deviantart.com/{}", "urlMain": "https://www.deviantart.com/", @@ -701,10 +730,10 @@ "DigitalSpy": { "errorMsg": "The page you were looking for could not be found.", "errorType": "message", + "regexCheck": "^\\w{3,20}$", "url": "https://forums.digitalspy.com/profile/{}", "urlMain": "https://forums.digitalspy.com/", - "username_claimed": "blue", - "regexCheck": "^\\w{3,20}$" + "username_claimed": "blue" }, "Discogs": { "errorType": "status_code", @@ -713,23 +742,26 @@ "username_claimed": "blue" }, "Discord": { + "errorMsg": [ + "{\"taken\":false}", + "The resource is being rate limited" + ], "errorType": "message", - "url": "https://discord.com", - "urlMain": "https://discord.com/", - "urlProbe": "https://discord.com/api/v9/unique-username/username-attempt-unauthed", - "errorMsg": ["{\"taken\":false}", "The resource is being rate limited"], + "headers": { + "Content-Type": "application/json" + }, "request_method": "POST", "request_payload": { "username": "{}" }, - "headers": { - "Content-Type": "application/json" - }, + "url": "https://discord.com", + "urlMain": "https://discord.com/", + "urlProbe": "https://discord.com/api/v9/unique-username/username-attempt-unauthed", "username_claimed": "blue" }, "Discord.bio": { - "errorType": "message", "errorMsg": "Server Error (500)", + "errorType": "message", "url": "https://discords.com/api-v2/bio/details/{}", "urlMain": "https://discord.bio/", "username_claimed": "robert" @@ -744,9 +776,9 @@ "errorMsg": "{\"result\":\"ok\"}", "errorType": "message", "regexCheck": "^[a-zA-Z0-9_.-]{3,40}$", - "urlProbe": "https://www.diskusjon.no/?app=core&module=system&controller=ajax&do=usernameExists&input={}", "url": "https://www.diskusjon.no", "urlMain": "https://www.diskusjon.no", + "urlProbe": "https://www.diskusjon.no/?app=core&module=system&controller=ajax&do=usernameExists&input={}", "username_claimed": "blue" }, "Disqus": { @@ -805,6 +837,12 @@ "urlMain": "https://www.erome.com/", "username_claimed": "bob" }, + "Exercism": { + "errorType": "status_code", + "url": "https://exercism.org/profiles/{}", + "urlMain": "https://exercism.org/", + "username_claimed": "iHiD" + }, "Exposure": { "errorType": "status_code", "regexCheck": "^[a-zA-Z0-9-]{1,63}$", @@ -812,12 +850,6 @@ "urlMain": "https://exposure.co/", "username_claimed": "jonasjacobsson" }, - "exophase": { - "errorType": "status_code", - "url": "https://www.exophase.com/user/{}/", - "urlMain": "https://www.exophase.com/", - "username_claimed": "blue" - }, "EyeEm": { "errorType": "status_code", "url": "https://www.eyeem.com/u/{}", @@ -939,18 +971,18 @@ "urlMain": "https://www.gaiaonline.com/", "username_claimed": "adam" }, - "Gamespot": { - "errorType": "status_code", - "url": "https://www.gamespot.com/profile/{}/", - "urlMain": "https://www.gamespot.com/", - "username_claimed": "blue" - }, "GameFAQs": { "errorType": "status_code", "url": "https://gamefaqs.gamespot.com/community/{}", "urlMain": "https://gamefaqs.gamespot.com", "username_claimed": "blue" }, + "Gamespot": { + "errorType": "status_code", + "url": "https://www.gamespot.com/profile/{}/", + "urlMain": "https://www.gamespot.com/", + "username_claimed": "blue" + }, "GeeksforGeeks": { "errorType": "status_code", "url": "https://auth.geeksforgeeks.org/user/{}", @@ -990,8 +1022,8 @@ "username_claimed": "bob" }, "Giphy": { - "errorType": "message", "errorMsg": " GIFs - Find & Share on GIPHY", + "errorType": "message", "url": "https://giphy.com/{}", "urlMain": "https://giphy.com/", "username_claimed": "red" @@ -1010,14 +1042,6 @@ "urlMain": "https://www.github.com/", "username_claimed": "blue" }, - "Warframe Market": { - "errorType": "status_code", - "request_method": "GET", - "url": "https://warframe.market/profile/{}", - "urlMain": "https://warframe.market/", - "urlProbe": "https://api.warframe.market/v2/user/{}", - "username_claimed": "kaiallalone" - }, "GitLab": { "errorMsg": "[]", "errorType": "message", @@ -1086,6 +1110,12 @@ "urlMain": "https://www.gutefrage.net/", "username_claimed": "gutefrage" }, + "HackMD": { + "errorType": "status_code", + "url": "https://hackmd.io/@{}", + "urlMain": "https://hackmd.io/", + "username_claimed": "blue" + }, "HackTheBox": { "errorType": "status_code", "url": "https://forum.hackthebox.com/u/{}", @@ -1114,7 +1144,10 @@ }, "HackerNews": { "__comment__": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.", - "errorMsg": ["No such user.", "Sorry."], + "errorMsg": [ + "No such user.", + "Sorry." + ], "errorType": "message", "url": "https://news.ycombinator.com/user?id={}", "urlMain": "https://news.ycombinator.com/", @@ -1141,12 +1174,6 @@ "urlMain": "https://forum.hackersploit.org/", "username_claimed": "hackersploit" }, - "HackMD": { - "errorType": "status_code", - "url": "https://hackmd.io/@{}", - "urlMain": "https://hackmd.io/", - "username_claimed": "blue" - }, "Harvard Scholar": { "errorType": "status_code", "url": "https://scholar.harvard.edu/{}", @@ -1186,6 +1213,13 @@ "urlProbe": "https://www.holopin.io/api/auth/username", "username_claimed": "red" }, + "HotUKdeals": { + "errorType": "status_code", + "request_method": "GET", + "url": "https://www.hotukdeals.com/profile/{}", + "urlMain": "https://www.hotukdeals.com/", + "username_claimed": "Blue" + }, "Houzz": { "errorType": "status_code", "url": "https://houzz.com/user/{}", @@ -1198,13 +1232,6 @@ "urlMain": "https://hubpages.com/", "username_claimed": "blue" }, - "Hubski": { - "errorMsg": "No such user", - "errorType": "message", - "url": "https://hubski.com/user/{}", - "urlMain": "https://hubski.com/", - "username_claimed": "blue" - }, "HudsonRock": { "errorMsg": "This username is not associated", "errorType": "message", @@ -1218,6 +1245,12 @@ "urlMain": "https://huggingface.co/", "username_claimed": "Pasanlaksitha" }, + "HuggingFace": { + "errorType": "status_code", + "url": "https://huggingface.co/{}", + "urlMain": "https://huggingface.co/", + "username_claimed": "blue" + }, "IFTTT": { "errorType": "status_code", "regexCheck": "^[A-Za-z0-9]{3,35}$", @@ -1225,12 +1258,6 @@ "urlMain": "https://www.ifttt.com/", "username_claimed": "blue" }, - "Ifunny": { - "errorType": "status_code", - "url": "https://ifunny.co/user/{}", - "urlMain": "https://ifunny.co/", - "username_claimed": "agua" - }, "IRC-Galleria": { "errorType": "response_url", "errorUrl": "https://irc-galleria.net/users/search?username={}", @@ -1244,6 +1271,12 @@ "urlMain": "https://community.icons8.com/", "username_claimed": "thefourCraft" }, + "Ifunny": { + "errorType": "status_code", + "url": "https://ifunny.co/user/{}", + "urlMain": "https://ifunny.co/", + "username_claimed": "agua" + }, "Image Fap": { "errorMsg": "Not found", "errorType": "message", @@ -1265,12 +1298,6 @@ "urlProbe": "https://api.imgur.com/account/v1/accounts/{}?client_id=546c25a59c58ad7", "username_claimed": "blue" }, - "imood": { - "errorType": "status_code", - "url": "https://www.imood.com/users/{}", - "urlMain": "https://www.imood.com/", - "username_claimed": "blue" - }, "Instagram": { "errorType": "status_code", "url": "https://instagram.com/{}", @@ -1348,7 +1375,7 @@ "username_claimed": "laurent" }, "Jupyter Community Forum": { - "errorMsg": "Oops! That page doesn’t exist or is private.", + "errorMsg": "Oops! That page doesn\u2019t exist or is private.", "errorType": "message", "url": "https://discourse.jupyter.org/u/{}/summary", "urlMain": "https://discourse.jupyter.org", @@ -1360,14 +1387,6 @@ "urlMain": "https://www.kaggle.com/", "username_claimed": "dansbecker" }, - "kaskus": { - "errorType": "status_code", - "url": "https://www.kaskus.co.id/@{}", - "urlMain": "https://www.kaskus.co.id", - "urlProbe": "https://www.kaskus.co.id/api/users?username={}", - "request_method": "GET", - "username_claimed": "l0mbart" - }, "Keybase": { "errorType": "status_code", "url": "https://keybase.io/{}", @@ -1404,9 +1423,9 @@ "errorMsg": "{\"result\":\"ok\"}", "errorType": "message", "regexCheck": "^[a-zA-Z0-9_.-]{3,18}$", - "urlProbe": "https://forum.kvinneguiden.no/?app=core&module=system&controller=ajax&do=usernameExists&input={}", "url": "https://forum.kvinneguiden.no", "urlMain": "https://forum.kvinneguiden.no", + "urlProbe": "https://forum.kvinneguiden.no/?app=core&module=system&controller=ajax&do=usernameExists&input={}", "username_claimed": "blue" }, "LOR": { @@ -1417,9 +1436,9 @@ }, "Laracast": { "errorType": "status_code", + "regexCheck": "^[a-zA-Z0-9_-]{3,}$", "url": "https://laracasts.com/@{}", "urlMain": "https://laracasts.com/", - "regexCheck": "^[a-zA-Z0-9_-]{3,}$", "username_claimed": "user1" }, "Launchpad": { @@ -1428,6 +1447,12 @@ "urlMain": "https://launchpad.net/", "username_claimed": "blue" }, + "Layers.to": { + "errorType": "status_code", + "url": "https://layers.to/{}", + "urlMain": "https://layers.to/", + "username_claimed": "blue" + }, "LeetCode": { "errorType": "status_code", "url": "https://leetcode.com/{}", @@ -1435,17 +1460,17 @@ "username_claimed": "blue" }, "LemmyWorld": { - "errorType": "message", "errorMsg": "

Error!

", + "errorType": "message", "url": "https://lemmy.world/u/{}", "urlMain": "https://lemmy.world", "username_claimed": "blue" }, "LessWrong": { - "url": "https://www.lesswrong.com/users/{}", - "urlMain": "https://www.lesswrong.com/", "errorType": "response_url", "errorUrl": "https://www.lesswrong.com/", + "url": "https://www.lesswrong.com/users/{}", + "urlMain": "https://www.lesswrong.com/", "username_claimed": "habryka" }, "Letterboxd": { @@ -1471,12 +1496,12 @@ "urlMain": "https://lichess.org", "username_claimed": "john" }, - "LinkedIn": { + "LinkedIn": { "errorType": "status_code", "headers": { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" }, "regexCheck": "^[a-zA-Z0-9]{3,100}$", "request_method": "GET", @@ -1597,10 +1622,10 @@ }, "MuseScore": { "errorType": "status_code", + "request_method": "GET", "url": "https://musescore.com/{}", "urlMain": "https://musescore.com/", - "username_claimed": "arrangeme", - "request_method": "GET" + "username_claimed": "arrangeme" }, "MyAnimeList": { "errorType": "status_code", @@ -1614,11 +1639,18 @@ "urlMain": "https://www.myminifactory.com/", "username_claimed": "blue" }, - "Mydramalist": { - "errorMsg": "The requested page was not found", - "errorType": "message", - "url": "https://www.mydramalist.com/profile/{}", - "urlMain": "https://mydramalist.com", + "Mydealz": { + "errorType": "status_code", + "request_method": "GET", + "url": "https://www.mydealz.de/profile/{}", + "urlMain": "https://www.mydealz.de/", + "username_claimed": "blue" + }, + "Mydramalist": { + "errorMsg": "The requested page was not found", + "errorType": "message", + "url": "https://www.mydramalist.com/profile/{}", + "urlMain": "https://mydramalist.com", "username_claimed": "elhadidy12398" }, "Myspace": { @@ -1634,13 +1666,6 @@ "urlMain": "https://www.native-instruments.com/forum/", "username_claimed": "jambert" }, - "namuwiki": { - "__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.", - "errorType": "status_code", - "url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}", - "urlMain": "https://namu.wiki/", - "username_claimed": "namu" - }, "NationStates Nation": { "errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!", "errorType": "message", @@ -1667,6 +1692,12 @@ "urlMain": "https://www.needrom.com/", "username_claimed": "needrom" }, + "Netlify": { + "errorType": "status_code", + "url": "https://{}.netlify.app", + "urlMain": "https://www.netlify.com/", + "username_claimed": "blue" + }, "Newgrounds": { "errorType": "status_code", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", @@ -1728,12 +1759,19 @@ "username_claimed": "blue" }, "ObservableHQ": { - "errorType": "message", "errorMsg": "Page not found", + "errorType": "message", "url": "https://observablehq.com/@{}", "urlMain": "https://observablehq.com/", "username_claimed": "mbostock" }, + "Odysee": { + "errorMsg": "", + "errorType": "message", + "url": "https://odysee.com/@{}", + "urlMain": "https://odysee.com/", + "username_claimed": "Odysee" + }, "Open Collective": { "errorType": "status_code", "url": "https://opencollective.com/{}", @@ -1753,13 +1791,6 @@ "urlMain": "https://www.openstreetmap.org/", "username_claimed": "blue" }, - "Odysee": { - "errorMsg": "", - "errorType": "message", - "url": "https://odysee.com/@{}", - "urlMain": "https://odysee.com/", - "username_claimed": "Odysee" - }, "Opensource": { "errorType": "status_code", "url": "https://opensource.com/users/{}", @@ -1828,68 +1859,33 @@ "urlMain": "https://pentesterlab.com/", "username_claimed": "0day" }, - "HotUKdeals": { - "errorType": "status_code", - "url": "https://www.hotukdeals.com/profile/{}", - "urlMain": "https://www.hotukdeals.com/", - "username_claimed": "Blue", - "request_method": "GET" - }, - "Mydealz": { - "errorType": "status_code", - "url": "https://www.mydealz.de/profile/{}", - "urlMain": "https://www.mydealz.de/", - "username_claimed": "blue", - "request_method": "GET" - }, - "Chollometro": { - "errorType": "status_code", - "url": "https://www.chollometro.com/profile/{}", - "urlMain": "https://www.chollometro.com/", - "username_claimed": "blue", - "request_method": "GET" - }, "PepperNL": { "errorType": "status_code", + "request_method": "GET", "url": "https://nl.pepper.com/profile/{}", "urlMain": "https://nl.pepper.com/", - "username_claimed": "Dynaw", - "request_method": "GET" + "username_claimed": "Dynaw" }, "PepperPL": { "errorType": "status_code", + "request_method": "GET", "url": "https://www.pepper.pl/profile/{}", "urlMain": "https://www.pepper.pl/", - "username_claimed": "FireChicken", - "request_method": "GET" - }, - "Preisjaeger": { - "errorType": "status_code", - "url": "https://www.preisjaeger.at/profile/{}", - "urlMain": "https://www.preisjaeger.at/", - "username_claimed": "Stefan", - "request_method": "GET" + "username_claimed": "FireChicken" }, "Pepperdeals": { "errorType": "status_code", + "request_method": "GET", "url": "https://www.pepperdeals.se/profile/{}", "urlMain": "https://www.pepperdeals.se/", - "username_claimed": "Mark", - "request_method": "GET" + "username_claimed": "Mark" }, "PepperealsUS": { "errorType": "status_code", + "request_method": "GET", "url": "https://www.pepperdeals.com/profile/{}", "urlMain": "https://www.pepperdeals.com/", - "username_claimed": "Stepan", - "request_method": "GET" - }, - "Promodescuentos": { - "errorType": "status_code", - "url": "https://www.promodescuentos.com/profile/{}", - "urlMain": "https://www.promodescuentos.com/", - "username_claimed": "blue", - "request_method": "GET" + "username_claimed": "Stepan" }, "Periscope": { "errorType": "status_code", @@ -1904,11 +1900,21 @@ "urlMain": "https://www.pinkbike.com/", "username_claimed": "blue" }, - "pixelfed.social": { + "Pinterest": { "errorType": "status_code", - "url": "https://pixelfed.social/{}/", - "urlMain": "https://pixelfed.social", - "username_claimed": "pylapp" + "errorUrl": "https://www.pinterest.com/", + "url": "https://www.pinterest.com/{}/", + "urlMain": "https://www.pinterest.com/", + "urlProbe": "https://www.pinterest.com/oembed.json?url=https://www.pinterest.com/{}/", + "username_claimed": "blue" + }, + "Platzi": { + "errorCode": 404, + "errorType": "status_code", + "request_method": "GET", + "url": "https://platzi.com/p/{}/", + "urlMain": "https://platzi.com/", + "username_claimed": "freddier" }, "PlayStore": { "errorType": "status_code", @@ -1970,32 +1976,32 @@ "urlMain": "https://pornhub.com/", "username_claimed": "blue" }, + "Preisjaeger": { + "errorType": "status_code", + "request_method": "GET", + "url": "https://www.preisjaeger.at/profile/{}", + "urlMain": "https://www.preisjaeger.at/", + "username_claimed": "Stefan" + }, "ProductHunt": { "errorType": "status_code", "url": "https://www.producthunt.com/@{}", "urlMain": "https://www.producthunt.com/", "username_claimed": "jenny" }, - "programming.dev": { - "errorMsg": "Error!", - "errorType": "message", - "url": "https://programming.dev/u/{}", - "urlMain": "https://programming.dev", - "username_claimed": "pylapp" - }, - "Pychess": { - "errorType": "message", - "errorMsg": "404", - "url": "https://www.pychess.org/@/{}", - "urlMain": "https://www.pychess.org", - "username_claimed": "gbtami" - }, "PromoDJ": { "errorType": "status_code", "url": "http://promodj.com/{}", "urlMain": "http://promodj.com/", "username_claimed": "blue" }, + "Promodescuentos": { + "errorType": "status_code", + "request_method": "GET", + "url": "https://www.promodescuentos.com/profile/{}", + "urlMain": "https://www.promodescuentos.com/", + "username_claimed": "blue" + }, "Pronouns.page": { "errorType": "status_code", "url": "https://pronouns.page/@{}", @@ -2005,12 +2011,19 @@ "PyPi": { "errorType": "status_code", "url": "https://pypi.org/user/{}", - "urlProbe": "https://pypi.org/_includes/administer-user-include/{}", "urlMain": "https://pypi.org", + "urlProbe": "https://pypi.org/_includes/administer-user-include/{}", "username_claimed": "Blue" }, + "Pychess": { + "errorMsg": "404", + "errorType": "message", + "url": "https://www.pychess.org/@/{}", + "urlMain": "https://www.pychess.org", + "username_claimed": "gbtami" + }, "Python.org Discussions": { - "errorMsg": "Oops! That page doesn’t exist or is private.", + "errorMsg": "Oops! That page doesn\u2019t exist or is private.", "errorType": "message", "url": "https://discuss.python.org/u/{}/summary", "urlMain": "https://discuss.python.org", @@ -2035,11 +2048,12 @@ "urlMain": "https://rateyourmusic.com/", "username_claimed": "blue" }, - "Rclone Forum": { - "errorType": "status_code", - "url": "https://forum.rclone.org/u/{}", - "urlMain": "https://forum.rclone.org/", - "username_claimed": "ncw" + "Realmeye": { + "errorMsg": "Sorry, but we either:", + "errorType": "message", + "url": "https://www.realmeye.com/player/{}", + "urlMain": "https://www.realmeye.com/", + "username_claimed": "rotmg" }, "RedTube": { "errorType": "status_code", @@ -2064,13 +2078,6 @@ "urlMain": "https://www.reddit.com/", "username_claimed": "blue" }, - "Realmeye": { - "errorMsg": "Sorry, but we either:", - "errorType": "message", - "url": "https://www.realmeye.com/player/{}", - "urlMain": "https://www.realmeye.com/", - "username_claimed": "rotmg" - }, "Reisefrage": { "errorType": "status_code", "url": "https://www.reisefrage.net/nutzer/{}", @@ -2119,7 +2126,7 @@ "username_claimed": "asuna-black" }, "Ruby Forums": { - "errorMsg": "Oops! That page doesn’t exist or is private.", + "errorMsg": "Oops! That page doesn\u2019t exist or is private.", "errorType": "message", "url": "https://ruby-forum.com/u/{}/summary", "urlMain": "https://ruby-forums.com", @@ -2147,6 +2154,19 @@ "urlProbe": "https://apps.runescape.com/runemetrics/profile/profile?user={}", "username_claimed": "L33" }, + "SEOForum": { + "errorType": "status_code", + "url": "https://seoforum.com/@{}", + "urlMain": "https://www.seoforum.com/", + "username_claimed": "ko" + }, + "SOOP": { + "errorType": "status_code", + "url": "https://www.sooplive.co.kr/station/{}", + "urlMain": "https://www.sooplive.co.kr/", + "urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station", + "username_claimed": "udkn" + }, "SWAPD": { "errorType": "status_code", "url": "https://swapd.co/u/{}", @@ -2172,12 +2192,6 @@ "urlMain": "https://www.scribd.com/", "username_claimed": "blue" }, - "SEOForum": { - "errorType": "status_code", - "url": "https://seoforum.com/@{}", - "urlMain": "https://www.seoforum.com/", - "username_claimed": "ko" - }, "Shelf": { "errorType": "status_code", "url": "https://www.shelf.im/{}", @@ -2225,8 +2239,8 @@ "username_claimed": "blue" }, "SlideShare": { - "errorType": "message", "errorMsg": "Page no longer exists", + "errorType": "message", "url": "https://slideshare.net/{}", "urlMain": "https://slideshare.net/", "username_claimed": "blue" @@ -2260,13 +2274,6 @@ "urlMain": "https://www.snapchat.com", "username_claimed": "teamsnapchat" }, - "SOOP": { - "errorType": "status_code", - "url": "https://www.sooplive.co.kr/station/{}", - "urlMain": "https://www.sooplive.co.kr/", - "urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station", - "username_claimed": "udkn" - }, "SoundCloud": { "errorType": "status_code", "url": "https://soundcloud.com/{}", @@ -2341,6 +2348,13 @@ "urlMain": "https://open.spotify.com/", "username_claimed": "blue" }, + "Spoutible": { + "errorMsg": "Page not found!", + "errorType": "message", + "url": "https://spoutible.com/{}", + "urlMain": "https://spoutible.com/", + "username_claimed": "cbouzy" + }, "Star Citizen": { "errorMsg": "404", "errorType": "message", @@ -2390,42 +2404,6 @@ "urlProbe": "https://ch.tetr.io/api/users/{}", "username_claimed": "osk" }, - "TheMovieDB": { - "errorType": "status_code", - "url": "https://www.themoviedb.org/u/{}", - "urlMain": "https://www.themoviedb.org/", - "username_claimed": "blue" - }, - "TikTok": { - "url": "https://www.tiktok.com/@{}", - "urlMain": "https://www.tiktok.com", - "errorType": "message", - "errorMsg": [ - "\"statusCode\":10221", - "Govt. of India decided to block 59 apps" - ], - "username_claimed": "charlidamelio" - }, - "Tiendanube": { - "url": "https://{}.mitiendanube.com/", - "urlMain": "https://www.tiendanube.com/", - "errorType": "status_code", - "username_claimed": "blue" - }, - "Topcoder": { - "errorType": "status_code", - "url": "https://profiles.topcoder.com/{}/", - "urlMain": "https://topcoder.com/", - "username_claimed": "USER", - "urlProbe": "https://api.topcoder.com/v5/members/{}", - "regexCheck": "^[a-zA-Z0-9_.]+$" - }, - "Topmate": { - "errorType": "status_code", - "url": "https://topmate.io/{}", - "urlMain": "https://topmate.io/", - "username_claimed": "blue" - }, "TRAKTRAIN": { "errorType": "status_code", "url": "https://traktrain.com/{}", @@ -2463,17 +2441,33 @@ "urlMain": "https://forums.terraria.org/index.php", "username_claimed": "blue" }, + "TheMovieDB": { + "errorType": "status_code", + "url": "https://www.themoviedb.org/u/{}", + "urlMain": "https://www.themoviedb.org/", + "username_claimed": "blue" + }, "ThemeForest": { "errorType": "status_code", "url": "https://themeforest.net/user/{}", "urlMain": "https://themeforest.net/", "username_claimed": "user" }, - "tistory": { + "Tiendanube": { "errorType": "status_code", - "url": "https://{}.tistory.com/", - "urlMain": "https://www.tistory.com/", - "username_claimed": "notice" + "url": "https://{}.mitiendanube.com/", + "urlMain": "https://www.tiendanube.com/", + "username_claimed": "blue" + }, + "TikTok": { + "errorMsg": [ + "\"statusCode\":10221", + "Govt. of India decided to block 59 apps" + ], + "errorType": "message", + "url": "https://www.tiktok.com/@{}", + "urlMain": "https://www.tiktok.com", + "username_claimed": "charlidamelio" }, "TnAFlix": { "errorType": "status_code", @@ -2482,6 +2476,20 @@ "urlMain": "https://www.tnaflix.com/", "username_claimed": "hacker" }, + "Topcoder": { + "errorType": "status_code", + "regexCheck": "^[a-zA-Z0-9_.]+$", + "url": "https://profiles.topcoder.com/{}/", + "urlMain": "https://topcoder.com/", + "urlProbe": "https://api.topcoder.com/v5/members/{}", + "username_claimed": "USER" + }, + "Topmate": { + "errorType": "status_code", + "url": "https://topmate.io/{}", + "urlMain": "https://topmate.io/", + "username_claimed": "blue" + }, "TradingView": { "errorType": "status_code", "request_method": "GET", @@ -2517,6 +2525,13 @@ "urlProbe": "https://trello.com/1/Members/{}", "username_claimed": "blue" }, + "Trovo": { + "errorMsg": "Uh Ohhh...", + "errorType": "message", + "url": "https://trovo.live/s/{}/", + "urlMain": "https://trovo.live", + "username_claimed": "Aimilios" + }, "TryHackMe": { "errorMsg": "{\"success\":false}", "errorType": "message", @@ -2546,14 +2561,6 @@ "urlMain": "https://www.twitch.tv", "username_claimed": "xqc" }, - - "Trovo": { - "errorMsg": "Uh Ohhh...", - "errorType": "message", - "url": "https://trovo.live/s/{}/", - "urlMain": "https://trovo.live", - "username_claimed": "Aimilios" - }, "Twitter": { "errorMsg": [ "
User ", @@ -2592,13 +2599,6 @@ "urlMain": "https://untappd.com/", "username_claimed": "untappd" }, - "Valorant Forums": { - "errorMsg": "The page you requested could not be found.", - "errorType": "message", - "url": "https://valorantforums.com/u/{}", - "urlMain": "https://valorantforums.com", - "username_claimed": "Wolves" - }, "VK": { "errorType": "response_url", "errorUrl": "https://www.quora.com/profile/{}", @@ -2606,12 +2606,25 @@ "urlMain": "https://vk.com/", "username_claimed": "brown" }, + "VLR": { + "errorType": "status_code", + "url": "https://www.vlr.gg/user/{}", + "urlMain": "https://www.vlr.gg", + "username_claimed": "optms" + }, "VSCO": { "errorType": "status_code", "url": "https://vsco.co/{}", "urlMain": "https://vsco.co/", "username_claimed": "blue" }, + "Valorant Forums": { + "errorMsg": "The page you requested could not be found.", + "errorType": "message", + "url": "https://valorantforums.com/u/{}", + "urlMain": "https://valorantforums.com", + "username_claimed": "Wolves" + }, "Velog": { "errorType": "status_code", "url": "https://velog.io/@{}/posts", @@ -2626,7 +2639,9 @@ "username_claimed": "red" }, "Venmo": { - "errorMsg": ["Venmo | Page Not Found"], + "errorMsg": [ + "Venmo | Page Not Found" + ], "errorType": "message", "headers": { "Host": "account.venmo.com" @@ -2636,12 +2651,10 @@ "urlProbe": "https://test1.venmo.com/u/{}", "username_claimed": "jenny" }, - "Vero": { - "errorMsg": "Not Found", - "errorType": "message", - "request_method": "GET", - "url": "https://vero.co/{}", - "urlMain": "https://vero.co/", + "Vercel": { + "errorType": "status_code", + "url": "https://{}.vercel.app", + "urlMain": "https://vercel.com/", "username_claimed": "blue" }, "Vimeo": { @@ -2658,12 +2671,6 @@ "urlProbe": "https://www.virustotal.com/ui/users/{}/avatar", "username_claimed": "blue" }, - "VLR": { - "errorType": "status_code", - "url": "https://www.vlr.gg/user/{}", - "urlMain": "https://www.vlr.gg", - "username_claimed": "optms" - }, "WICG Forum": { "errorType": "status_code", "regexCheck": "^(?![.-])[a-zA-Z0-9_.-]{3,20}$", @@ -2677,6 +2684,14 @@ "urlMain": "https://wakatime.com/", "username_claimed": "blue" }, + "Warframe Market": { + "errorType": "status_code", + "request_method": "GET", + "url": "https://warframe.market/profile/{}", + "urlMain": "https://warframe.market/", + "urlProbe": "https://api.warframe.market/v2/user/{}", + "username_claimed": "kaiallalone" + }, "Warrior Forum": { "errorType": "status_code", "url": "https://www.warriorforum.com/members/{}.html", @@ -2767,6 +2782,13 @@ "urlMain": "https://www.wordnik.com/", "username_claimed": "blue" }, + "Wowhead": { + "errorCode": 404, + "errorType": "status_code", + "url": "https://wowhead.com/user={}", + "urlMain": "https://wowhead.com/", + "username_claimed": "blue" + }, "Wykop": { "errorType": "status_code", "url": "https://www.wykop.pl/ludzie/{}", @@ -2825,6 +2847,13 @@ "urlMain": "https://www.youtube.com/", "username_claimed": "youtube" }, + "addons.wago.io": { + "errorCode": 404, + "errorType": "status_code", + "url": "https://addons.wago.io/user/{}", + "urlMain": "https://addons.wago.io/", + "username_claimed": "blue" + }, "akniga": { "errorType": "status_code", "url": "https://akniga.org/profile/{}", @@ -2876,6 +2905,12 @@ "urlMain": "http://dating.ru", "username_claimed": "blue" }, + "dcinside": { + "errorType": "status_code", + "url": "https://gallog.dcinside.com/{}", + "urlMain": "https://www.dcinside.com/", + "username_claimed": "anrbrb" + }, "devRant": { "errorType": "response_url", "errorUrl": "https://devrant.com/", @@ -2902,6 +2937,12 @@ "urlMain": "https://eintracht.de", "username_claimed": "blue" }, + "exophase": { + "errorType": "status_code", + "url": "https://www.exophase.com/user/{}/", + "urlMain": "https://www.exophase.com/", + "username_claimed": "blue" + }, "fixya": { "errorType": "status_code", "url": "https://www.fixya.com/users/{}", @@ -2967,6 +3008,12 @@ "urlMain": "http://forum.igromania.ru/", "username_claimed": "blue" }, + "imood": { + "errorType": "status_code", + "url": "https://www.imood.com/users/{}", + "urlMain": "https://www.imood.com/", + "username_claimed": "blue" + }, "interpals": { "errorMsg": "The requested user does not exist or is inactive", "errorType": "message", @@ -2994,6 +3041,14 @@ "urlProbe": "https://www.jeuxvideo.com/profil/{}?mode=infos", "username_claimed": "adam" }, + "kaskus": { + "errorType": "status_code", + "request_method": "GET", + "url": "https://www.kaskus.co.id/@{}", + "urlMain": "https://www.kaskus.co.id", + "urlProbe": "https://www.kaskus.co.id/api/users?username={}", + "username_claimed": "l0mbart" + }, "kofi": { "errorType": "response_url", "errorUrl": "https://ko-fi.com/art?=redirect", @@ -3043,12 +3098,6 @@ "urlMain": "https://mastodon.xyz/", "username_claimed": "TheKinrar" }, - "mstdn.social": { - "errorType": "status_code", - "url": "https://mstdn.social/@{}", - "urlMain": "https://mstdn.social/", - "username_claimed": "MagicLike" - }, "mercadolivre": { "errorType": "status_code", "url": "https://www.mercadolivre.com.br/perfil/{}", @@ -3075,11 +3124,11 @@ "urlMain": "https://mstdn.io/", "username_claimed": "blue" }, - "nairaland.com": { + "mstdn.social": { "errorType": "status_code", - "url": "https://www.nairaland.com/{}", - "urlMain": "https://www.nairaland.com/", - "username_claimed": "red" + "url": "https://mstdn.social/@{}", + "urlMain": "https://mstdn.social/", + "username_claimed": "MagicLike" }, "n8n Community": { "errorType": "status_code", @@ -3087,6 +3136,19 @@ "urlMain": "https://community.n8n.io/", "username_claimed": "n8n" }, + "nairaland.com": { + "errorType": "status_code", + "url": "https://www.nairaland.com/{}", + "urlMain": "https://www.nairaland.com/", + "username_claimed": "red" + }, + "namuwiki": { + "__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.", + "errorType": "status_code", + "url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}", + "urlMain": "https://namu.wiki/", + "username_claimed": "namu" + }, "nnRU": { "errorType": "status_code", "regexCheck": "^[\\w@-]+?$", @@ -3141,13 +3203,11 @@ "urlMain": "https://pikabu.ru/", "username_claimed": "blue" }, - "Pinterest": { + "pixelfed.social": { "errorType": "status_code", - "errorUrl": "https://www.pinterest.com/", - "url": "https://www.pinterest.com/{}/", - "urlProbe": "https://www.pinterest.com/oembed.json?url=https://www.pinterest.com/{}/", - "urlMain": "https://www.pinterest.com/", - "username_claimed": "blue" + "url": "https://pixelfed.social/{}/", + "urlMain": "https://pixelfed.social", + "username_claimed": "pylapp" }, "pr0gramm": { "errorType": "status_code", @@ -3163,6 +3223,13 @@ "urlMain": "https://prog.hu/", "username_claimed": "Sting" }, + "programming.dev": { + "errorMsg": "Error!", + "errorType": "message", + "url": "https://programming.dev/u/{}", + "urlMain": "https://programming.dev", + "username_claimed": "pylapp" + }, "satsisRU": { "errorType": "status_code", "url": "https://satsis.info/user/{}", @@ -3194,7 +3261,7 @@ "username_claimed": "green" }, "threads": { - "errorMsg": "Threads • Log in", + "errorMsg": "Threads \u2022 Log in", "errorType": "message", "headers": { "Sec-Fetch-Mode": "navigate" @@ -3203,6 +3270,12 @@ "urlMain": "https://www.threads.net/", "username_claimed": "zuck" }, + "tistory": { + "errorType": "status_code", + "url": "https://{}.tistory.com/", + "urlMain": "https://www.tistory.com/", + "username_claimed": "notice" + }, "toster": { "errorType": "status_code", "url": "https://www.toster.ru/user/{}/answers", @@ -3240,44 +3313,5 @@ "url": "https://www.znanylekarz.pl/{}", "urlMain": "https://znanylekarz.pl", "username_claimed": "janusz-nowak" - }, - "Platzi": { - "errorType": "status_code", - "errorCode": 404, - "url": "https://platzi.com/p/{}/", - "urlMain": "https://platzi.com/", - "username_claimed": "freddier", - "request_method": "GET" - }, - "BabyRu": { - "url": "https://www.baby.ru/u/{}", - "urlMain": "https://www.baby.ru/", - "errorType": "message", - "errorMsg": [ - "\u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430, \u043a\u043e\u0442\u043e\u0440\u0443\u044e \u0432\u044b \u0438\u0441\u043a\u0430\u043b\u0438, \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430", - "\u0414\u043e\u0441\u0442\u0443\u043f \u0441 \u0432\u0430\u0448\u0435\u0433\u043e IP-\u0430\u0434\u0440\u0435\u0441\u0430 \u0432\u0440\u0435\u043c\u0435\u043d\u043d\u043e \u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d" - ], - "username_claimed": "example" - }, - "Wowhead": { - "url": "https://wowhead.com/user={}", - "urlMain": "https://wowhead.com/", - "errorType": "status_code", - "errorCode": 404, - "username_claimed": "blue" - }, - "addons.wago.io": { - "url": "https://addons.wago.io/user/{}", - "urlMain": "https://addons.wago.io/", - "errorType": "status_code", - "errorCode": 404, - "username_claimed": "blue" - }, - "CurseForge": { - "url": "https://www.curseforge.com/members/{}/projects", - "urlMain": "https://www.curseforge.com.", - "errorType": "status_code", - "errorCode": 404, - "username_claimed": "blue" } -} +} \ No newline at end of file diff --git a/sherlock_project/result.py b/sherlock_project/result.py index c4d68b1c88..4e5b6c92f8 100644 --- a/sherlock_project/result.py +++ b/sherlock_project/result.py @@ -2,6 +2,7 @@ This module defines various objects for recording the results of queries. """ + from enum import Enum @@ -10,11 +11,12 @@ class QueryStatus(Enum): Describes status of query about a given username. """ - CLAIMED = "Claimed" # Username Detected - AVAILABLE = "Available" # Username Not Detected - UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username - ILLEGAL = "Illegal" # Username Not Allowable For This Site - WAF = "WAF" # Request blocked by WAF (i.e. Cloudflare) + + CLAIMED = "Claimed" # Username Detected + AVAILABLE = "Available" # Username Not Detected + UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username + ILLEGAL = "Illegal" # Username Not Allowable For This Site + WAF = "WAF" # Request blocked by WAF (i.e. Cloudflare) def __str__(self): """Convert Object To String. @@ -27,13 +29,16 @@ def __str__(self): """ return self.value -class QueryResult(): + +class QueryResult: """Query Result Object. Describes result of query about a given username. """ - def __init__(self, username, site_name, site_url_user, status, - query_time=None, context=None): + + def __init__( + self, username, site_name, site_url_user, status, query_time=None, context=None + ): """Create Query Result Object. Contains information about a specific method of detecting usernames on @@ -62,12 +67,12 @@ def __init__(self, username, site_name, site_url_user, status, Nothing. """ - self.username = username - self.site_name = site_name + self.username = username + self.site_name = site_name self.site_url_user = site_url_user - self.status = status - self.query_time = query_time - self.context = context + self.status = status + self.query_time = query_time + self.context = context return diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index a284f47ad5..3bb13e5ee0 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -10,10 +10,12 @@ import sys try: - from sherlock_project.__init__ import import_error_test_var # noqa: F401 + from sherlock_project.__init__ import import_error_test_var # noqa: F401 except ImportError: print("Did you run Sherlock with `python3 sherlock/sherlock.py ...`?") - print("This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions.") + print( + "This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions." + ) sys.exit(1) import csv @@ -21,6 +23,9 @@ import pandas as pd import os import re +import random +from concurrent.futures import as_completed +from tqdm import tqdm from argparse import ArgumentParser, RawDescriptionHelpFormatter from json import loads as json_loads from time import monotonic @@ -167,6 +172,19 @@ def multiple_usernames(username): return allUsernames +# Modern User-Agents for rotation to avoid bot detection +USER_AGENTS = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:130.0) Gecko/20100101 Firefox/130.0", + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPad; CPU OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15", +] + + def sherlock( username: str, site_data: dict[str, dict[str, str]], @@ -174,6 +192,7 @@ def sherlock( dump_response: bool = False, proxy: Optional[str] = None, timeout: int = 60, + max_workers: int = 20, ) -> dict[str, dict[str, str | QueryResult]]: """Run Sherlock Analysis. @@ -189,6 +208,7 @@ def sherlock( proxy -- String indicating the proxy URL timeout -- Time in seconds to wait before timing out request. Default is 60 seconds. + max_workers -- Number of parallel workers to use for the search. Return Value: Dictionary containing results from report. Key of dictionary is the name @@ -207,14 +227,22 @@ def sherlock( # Notify caller that we are starting the query. query_notify.start(username) + # Performance optimization: cache local references + # Accessing local variables is faster than global/module ones. + status_illegal = QueryStatus.ILLEGAL + status_unknown = QueryStatus.UNKNOWN + status_claimed = QueryStatus.CLAIMED + status_available = QueryStatus.AVAILABLE + status_waf = QueryStatus.WAF + + # Pre-calculate escaped username for use in all URLs + username_escaped = username.replace(" ", "%20") + # Normal requests underlying_session = requests.session() - # Limit number of workers to 20. - # This is probably vastly overkill. - if len(site_data) >= 20: - max_workers = 20 - else: + # Limit number of workers. + if len(site_data) < max_workers: max_workers = len(site_data) # Create multi-threaded session for all requests. @@ -225,17 +253,17 @@ def sherlock( # Results from analysis of all sites results_total = {} + # Create a mapping of futures to social network names + future_to_network = {} + # First create futures for all requests. This allows for the requests to run in parallel for social_network, net_info in site_data.items(): # Results from analysis of this specific site results_site = {"url_main": net_info.get("urlMain")} - # Record URL of main site - - # A user agent is needed because some sites don't return the correct - # information since they think that we are bots (Which we actually are...) + # Use a random User-Agent for each request to avoid bot detection headers = { - "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0", + "User-Agent": random.choice(USER_AGENTS) } if "headers" in net_info: @@ -243,19 +271,20 @@ def sherlock( headers.update(net_info["headers"]) # URL of user on site (if it exists) - url = interpolate_string(net_info["url"], username.replace(' ', '%20')) + url = interpolate_string(net_info["url"], username_escaped) # Don't make request if username is invalid for the site regex_check = net_info.get("regexCheck") if regex_check and re.search(regex_check, username) is None: # No need to do the check at the site: this username is not allowed. results_site["status"] = QueryResult( - username, social_network, url, QueryStatus.ILLEGAL + username, social_network, url, status_illegal ) results_site["url_user"] = "" results_site["http_status"] = "" results_site["response_text"] = "" query_notify.update(results_site["status"]) + results_total[social_network] = results_site else: # URL of user on site (if it exists) results_site["url_user"] = url @@ -329,176 +358,158 @@ def sherlock( json=request_payload, ) - # Store future in data for access later - net_info["request_future"] = future - - # Add this site's results into final dictionary with all the other results. - results_total[social_network] = results_site - - # Open the file containing account links - for social_network, net_info in site_data.items(): - # Retrieve results again - results_site = results_total.get(social_network) - - # Retrieve other site information again - url = results_site.get("url_user") - status = results_site.get("status") - if status is not None: - # We have already determined the user doesn't exist here - continue - - # Get the expected error type - error_type = net_info["errorType"] - if isinstance(error_type, str): - error_type: list[str] = [error_type] - - # Retrieve future and ensure it has finished - future = net_info["request_future"] - r, error_text, exception_text = get_response( - request_future=future, error_type=error_type, social_network=social_network - ) - - # Get response time for response of our request. - try: - response_time = r.elapsed - except AttributeError: - response_time = None - - # Attempt to get request information - try: - http_status = r.status_code - except Exception: - http_status = "?" - try: - response_text = r.text.encode(r.encoding or "UTF-8") - except Exception: - response_text = "" - - query_status = QueryStatus.UNKNOWN - error_context = None - - # As WAFs advance and evolve, they will occasionally block Sherlock and - # lead to false positives and negatives. Fingerprints should be added - # here to filter results that fail to bypass WAFs. Fingerprints should - # be highly targetted. Comment at the end of each fingerprint to - # indicate target and date fingerprinted. - WAFHitMsgs = [ - r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare - r'', # 2024-11-11 Cloudflare error page - r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS) - r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security - ] - - if error_text is not None: - error_context = error_text - - elif any(hitMsg in r.text for hitMsg in WAFHitMsgs): - query_status = QueryStatus.WAF + # Store future in mapping for access later + future_to_network[future] = social_network + results_total[social_network] = results_site + + # Optimization: move WAF fingerprints outside the results loop + WAFHitMsgs = [ + r".loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark", # 2024-05-13 Cloudflare + r'', # 2024-11-11 Cloudflare error page + r"AwsWafIntegration.forceRefreshToken", # 2024-11-11 Cloudfront (AWS) + r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:', # 2024-04-09 PerimeterX / Human Security + ] + + # Process results as they complete with a progress bar + with tqdm(total=len(site_data), desc="Searching", unit="site", disable=dump_response) as progress_bar: + # Update progress for sites already skipped due to regex + progress_bar.update(len(site_data) - len(future_to_network)) + + for future in as_completed(future_to_network): + social_network = future_to_network[future] + net_info = site_data[social_network] + results_site = results_total[social_network] + + # Update progress bar + progress_bar.update(1) + + # Retrieve other site information again + url = results_site.get("url_user") + + # Get the expected error type + error_type = net_info["errorType"] + if isinstance(error_type, str): + error_type = [error_type] + + # Retrieve future results + r, error_text, exception_text = get_response( + request_future=future, error_type=error_type, social_network=social_network + ) - else: - if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): - error_context = f"Unknown error type '{error_type}' for {social_network}" - query_status = QueryStatus.UNKNOWN - else: - if "message" in error_type: - # error_flag True denotes no error found in the HTML - # error_flag False denotes error found in the HTML - error_flag = True - errors = net_info.get("errorMsg") - # errors will hold the error message - # it can be string or list - # by isinstance method we can detect that - # and handle the case for strings as normal procedure - # and if its list we can iterate the errors - if isinstance(errors, str): - # Checks if the error message is in the HTML - # if error is present we will set flag to False - if errors in r.text: - error_flag = False - else: - # If it's list, it will iterate all the error message - for error in errors: - if error in r.text: - error_flag = False - break - if error_flag: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE - - if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: - error_codes = net_info.get("errorCode") - query_status = QueryStatus.CLAIMED - - # Type consistency, allowing for both singlets and lists in manifest - if isinstance(error_codes, int): - error_codes = [error_codes] - - if error_codes is not None and r.status_code in error_codes: - query_status = QueryStatus.AVAILABLE - elif r.status_code >= 300 or r.status_code < 200: - query_status = QueryStatus.AVAILABLE - - if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: - # For this detection method, we have turned off the redirect. - # So, there is no need to check the response URL: it will always - # match the request. Instead, we will ensure that the response - # code indicates that the request was successful (i.e. no 404, or - # forward to some odd redirect). - if 200 <= r.status_code < 300: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE - - if dump_response: - print("+++++++++++++++++++++") - print(f"TARGET NAME : {social_network}") - print(f"USERNAME : {username}") - print(f"TARGET URL : {url}") - print(f"TEST METHOD : {error_type}") + # Get response time for response of our request. try: - print(f"STATUS CODES : {net_info['errorCode']}") - except KeyError: - pass - print("Results...") + response_time = r.elapsed + except AttributeError: + response_time = None + + # Attempt to get request information try: - print(f"RESPONSE CODE : {r.status_code}") + http_status = r.status_code + response_text_content = r.text except Exception: - pass - try: - print(f"ERROR TEXT : {net_info['errorMsg']}") - except KeyError: - pass - print(">>>>> BEGIN RESPONSE TEXT") + http_status = '?' + response_text_content = '' + try: - print(r.text) + response_text = response_text_content.encode(r.encoding or 'UTF-8') except Exception: - pass - print("<<<<< END RESPONSE TEXT") - print("VERDICT : " + str(query_status)) - print("+++++++++++++++++++++") - - # Notify caller about results of query. - result: QueryResult = QueryResult( - username=username, - site_name=social_network, - site_url_user=url, - status=query_status, - query_time=response_time, - context=error_context, - ) - query_notify.update(result) + response_text = b'' - # Save status of request - results_site["status"] = result + query_status = status_unknown + error_context = None - # Save results from request - results_site["http_status"] = http_status - results_site["response_text"] = response_text + if error_text is not None: + error_context = error_text - # Add this site's results into final dictionary with all of the other results. - results_total[social_network] = results_site + elif any(hitMsg in response_text_content for hitMsg in WAFHitMsgs): + query_status = status_waf + else: + if any( + errtype not in ['message', 'status_code', 'response_url'] + for errtype in error_type + ): + error_context = ( + f'Unknown error type "{error_type}" for {social_network}' + ) + query_status = status_unknown + else: + if 'message' in error_type: + error_flag = True + errors = net_info.get('errorMsg') + if isinstance(errors, str): + if errors in response_text_content: + error_flag = False + else: + for error in errors: + if error in response_text_content: + error_flag = False + break + if error_flag: + query_status = status_claimed + else: + query_status = status_available + + if ( + 'status_code' in error_type + and query_status is not status_available + ): + error_codes = net_info.get('errorCode') + query_status = status_claimed + if isinstance(error_codes, int): + error_codes = [error_codes] + if error_codes is not None and http_status in error_codes: + query_status = status_available + elif http_status == '?' or http_status >= 300 or http_status < 200: + query_status = status_available + + if ( + 'response_url' in error_type + and query_status is not status_available + ): + if http_status != '?' and 200 <= http_status < 300: + query_status = status_claimed + else: + query_status = status_available + + if dump_response: + tqdm.write('+++++++++++++++++++++') + tqdm.write(f'TARGET NAME : {social_network}') + tqdm.write(f'USERNAME : {username}') + tqdm.write(f'TARGET URL : {url}') + tqdm.write(f'TEST METHOD : {error_type}') + try: + tqdm.write(f'STATUS CODES : {net_info['errorCode']}') + except KeyError: + pass + tqdm.write('Results...') + tqdm.write(f'RESPONSE CODE : {http_status}') + try: + tqdm.write(f'ERROR TEXT : {net_info['errorMsg']}') + except KeyError: + pass + tqdm.write('>>>>> BEGIN RESPONSE TEXT') + tqdm.write(response_text_content) + tqdm.write('<<<<< END RESPONSE TEXT') + tqdm.write('VERDICT : ' + str(query_status)) + tqdm.write('+++++++++++++++++++++') + + # Notify caller about results of query. + result = QueryResult( + username=username, + site_name=social_network, + site_url_user=url, + status=query_status, + query_time=response_time, + context=error_context, + ) + query_notify.update(result) + + # Save status of request + results_site['status'] = result + results_site['http_status'] = http_status + results_site['response_text'] = response_text + results_total[social_network] = results_site return results_total @@ -623,6 +634,16 @@ def main(): default=60, help="Time (in seconds) to wait for response to requests (Default: 60)", ) + parser.add_argument( + "--workers", + "-r", + action="store", + metavar="WORKERS", + dest="workers", + type=int, + default=20, + help="Number of parallel workers to use for the search (Default: 20)", + ) parser.add_argument( "--print-all", action="store_true", @@ -802,6 +823,9 @@ def main(): result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse ) + # Record start time + start_time = monotonic() + # Run report on all specified users. all_usernames = [] for username in args.username: @@ -818,6 +842,7 @@ def main(): dump_response=args.dump_response, proxy=args.proxy, timeout=args.timeout, + max_workers=args.workers, ) if args.output: @@ -915,8 +940,8 @@ def main(): { "username": usernames, "name": names, - "url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main], - "url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user], + "url_main": [f'=HYPERLINK("{u}")' for u in url_main], + "url_user": [f'=HYPERLINK("{u}")' for u in url_user], "exists": exists, "http_status": http_status, "response_time_s": response_time_s, @@ -925,7 +950,7 @@ def main(): DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False) print() - query_notify.finish() + query_notify.finish(total_time=monotonic() - start_time) if __name__ == "__main__": diff --git a/sherlock_project/sites.py b/sherlock_project/sites.py index c42554bacc..df8d6abd68 100644 --- a/sherlock_project/sites.py +++ b/sherlock_project/sites.py @@ -3,17 +3,26 @@ This module supports storing information about websites. This is the raw data that will be used to search for usernames. """ + import json import requests import secrets - MANIFEST_URL = "https://data.sherlockproject.xyz" EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt" + class SiteInformation: - def __init__(self, name, url_home, url_username_format, username_claimed, - information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)): + def __init__( + self, + name, + url_home, + url_username_format, + username_claimed, + information, + is_nsfw, + username_unclaimed=secrets.token_urlsafe(10), + ): """Create Site Information Object. Contains information about a specific website. @@ -58,7 +67,7 @@ def __init__(self, name, url_home, url_username_format, username_claimed, self.username_claimed = username_claimed self.username_unclaimed = secrets.token_urlsafe(32) self.information = information - self.is_nsfw = is_nsfw + self.is_nsfw = is_nsfw return @@ -77,11 +86,11 @@ def __str__(self): class SitesInformation: def __init__( - self, - data_file_path: str|None = None, - honor_exclusions: bool = True, - do_not_exclude: list[str] = [], - ): + self, + data_file_path: str | None = None, + honor_exclusions: bool = True, + do_not_exclude: list[str] = [], + ): """Create Sites Information Object. Contains information about all supported websites. @@ -131,9 +140,10 @@ def __init__( ) if response.status_code != 200: - raise FileNotFoundError(f"Bad response while accessing " - f"data file URL '{data_file_path}'." - ) + raise FileNotFoundError( + f"Bad response while accessing " + f"data file URL '{data_file_path}'." + ) try: site_data = response.json() except Exception as error: @@ -153,11 +163,12 @@ def __init__( ) except FileNotFoundError: - raise FileNotFoundError(f"Problem while attempting to access " - f"data file '{data_file_path}'." - ) + raise FileNotFoundError( + f"Problem while attempting to access " + f"data file '{data_file_path}'." + ) - site_data.pop('$schema', None) + site_data.pop("$schema", None) if honor_exclusions: try: @@ -187,21 +198,22 @@ def __init__( for site_name in site_data: try: - self.sites[site_name] = \ - SiteInformation(site_name, - site_data[site_name]["urlMain"], - site_data[site_name]["url"], - site_data[site_name]["username_claimed"], - site_data[site_name], - site_data[site_name].get("isNSFW",False) - - ) + self.sites[site_name] = SiteInformation( + site_name, + site_data[site_name]["urlMain"], + site_data[site_name]["url"], + site_data[site_name]["username_claimed"], + site_data[site_name], + site_data[site_name].get("isNSFW", False), + ) except KeyError as error: raise ValueError( f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}." ) except TypeError: - print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n") + print( + f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n" + ) return @@ -221,7 +233,7 @@ def remove_nsfw_sites(self, do_not_remove: list = []): if self.sites[site].is_nsfw and site.casefold() not in do_not_remove: continue sites[site] = self.sites[site] - self.sites = sites + self.sites = sites def site_name_list(self): """Get Site Name List. diff --git a/tests/conftest.py b/tests/conftest.py index 69fce756c6..d2c3ec50d5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,27 +4,46 @@ import pytest from sherlock_project.sites import SitesInformation + def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]: - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions) - sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj} + sites_obj = SitesInformation( + data_file_path=os.path.join( + os.path.dirname(__file__), "../sherlock_project/resources/data.json" + ), + honor_exclusions=honor_exclusions, + ) + sites_iterable: dict[str, dict[str, str]] = { + site.name: site.information for site in sites_obj + } return sites_iterable + @pytest.fixture() def sites_obj(): - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) + sites_obj = SitesInformation( + data_file_path=os.path.join( + os.path.dirname(__file__), "../sherlock_project/resources/data.json" + ), + honor_exclusions=False, + ) yield sites_obj + @pytest.fixture(scope="session") def sites_info(): yield fetch_local_manifest() + @pytest.fixture(scope="session") def remote_schema(): - schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json' + schema_url: str = ( + "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json" + ) with urllib.request.urlopen(schema_url) as remoteschema: schemadat = json.load(remoteschema) yield schemadat + def pytest_addoption(parser): parser.addoption( "--chunked-sites", @@ -33,6 +52,7 @@ def pytest_addoption(parser): help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.", ) + def pytest_generate_tests(metafunc): if "chunked_sites" in metafunc.fixturenames: sites_info = fetch_local_manifest(honor_exclusions=False) @@ -40,9 +60,12 @@ def pytest_generate_tests(metafunc): # Ingest and apply site selections site_filter: str | None = metafunc.config.getoption("--chunked-sites") if site_filter: - selected_sites: list[str] = [site.strip() for site in site_filter.split(",")] + selected_sites: list[str] = [ + site.strip() for site in site_filter.split(",") + ] sites_info = { - site: data for site, data in sites_info.items() + site: data + for site, data in sites_info.items() if site in selected_sites } diff --git a/tests/few_test_basic.py b/tests/few_test_basic.py index f704032263..9ee48f879a 100644 --- a/tests/few_test_basic.py +++ b/tests/few_test_basic.py @@ -1,7 +1,8 @@ import sherlock_project -#from sherlock.sites import SitesInformation -#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json") +# from sherlock.sites import SitesInformation +# local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json") + def test_username_via_message(): sherlock_project.__main__("--version") diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py index c28b9dc06a..b484a68a64 100644 --- a/tests/sherlock_interactives.py +++ b/tests/sherlock_interactives.py @@ -3,36 +3,39 @@ import re import subprocess + class Interactives: - def run_cli(args:str = "") -> str: + def run_cli(args: str = "") -> str: """Pass arguments to Sherlock as a normal user on the command line""" # Adapt for platform differences (Windows likes to be special) if platform.system() == "Windows": - command:str = f"py -m sherlock_project {args}" + command: str = f"py -m sherlock_project {args}" else: - command:str = f"sherlock {args}" + command: str = f"sherlock {args}" - proc_out:str = "" + proc_out: str = "" try: - proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) + proc_out = subprocess.check_output( + command, shell=True, stderr=subprocess.STDOUT + ) return proc_out.decode() except subprocess.CalledProcessError as e: raise InteractivesSubprocessError(e.output.decode()) - def walk_sherlock_for_files_with(pattern: str) -> list[str]: """Check all files within the Sherlock package for matching patterns""" - pattern:re.Pattern = re.compile(pattern) - matching_files:list[str] = [] + pattern: re.Pattern = re.compile(pattern) + matching_files: list[str] = [] for root, dirs, files in os.walk("sherlock_project"): for file in files: - file_path = os.path.join(root,file) + file_path = os.path.join(root, file) if "__pycache__" in file_path: continue - with open(file_path, 'r', errors='ignore') as f: + with open(file_path, "r", errors="ignore") as f: if pattern.search(f.read()): matching_files.append(file_path) return matching_files + class InteractivesSubprocessError(Exception): pass diff --git a/tests/test_manifest.py b/tests/test_manifest.py index b73e92408f..b40bfa8970 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -3,17 +3,18 @@ import pytest from jsonschema import validate + def test_validate_manifest_against_local_schema(): """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" - json_relative: str = '../sherlock_project/resources/data.json' - schema_relative: str = '../sherlock_project/resources/data.schema.json' + json_relative: str = "../sherlock_project/resources/data.json" + schema_relative: str = "../sherlock_project/resources/data.schema.json" json_path: str = os.path.join(os.path.dirname(__file__), json_relative) schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) - with open(json_path, 'r') as f: + with open(json_path, "r") as f: jsondat = json.load(f) - with open(schema_path, 'r') as f: + with open(schema_path, "r") as f: schemadat = json.load(f) validate(instance=jsondat, schema=schemadat) @@ -22,18 +23,22 @@ def test_validate_manifest_against_local_schema(): @pytest.mark.online def test_validate_manifest_against_remote_schema(remote_schema): """Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients.""" - json_relative: str = '../sherlock_project/resources/data.json' + json_relative: str = "../sherlock_project/resources/data.json" json_path: str = os.path.join(os.path.dirname(__file__), json_relative) - with open(json_path, 'r') as f: + with open(json_path, "r") as f: jsondat = json.load(f) validate(instance=jsondat, schema=remote_schema) + # Ensure that the expected values are beind returned by the site list -@pytest.mark.parametrize("target_name,target_expected_err_type", [ - ('GitHub', 'status_code'), - ('GitLab', 'message'), -]) -def test_site_list_iterability (sites_info, target_name, target_expected_err_type): - assert sites_info[target_name]['errorType'] == target_expected_err_type +@pytest.mark.parametrize( + "target_name,target_expected_err_type", + [ + ("GitHub", "status_code"), + ("GitLab", "message"), + ], +) +def test_site_list_iterability(sites_info, target_name, target_expected_err_type): + assert sites_info[target_name]["errorType"] == target_expected_err_type diff --git a/tests/test_probes.py b/tests/test_probes.py index 11fc8f8377..a344cb8626 100644 --- a/tests/test_probes.py +++ b/tests/test_probes.py @@ -5,7 +5,8 @@ from sherlock_project.sherlock import sherlock from sherlock_project.notify import QueryNotify from sherlock_project.result import QueryStatus -#from sherlock_interactives import Interactives + +# from sherlock_interactives import Interactives def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: @@ -16,90 +17,115 @@ def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: username=username, site_data=site_data, query_notify=query_notify, - )[site]['status'].status + )[ + site + ]["status"].status @pytest.mark.online class TestLiveTargets: """Actively test probes against live and trusted targets""" + # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('GitLab', 'ppfeister'), - ('AllMyLinks', 'blue'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("GitLab", "ppfeister"), + ("AllMyLinks", "blue"), + ], + ) def test_known_positives_via_message(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('GitHub', 'ppfeister'), - ('GitHub', 'sherlock-project'), - ('Docker Hub', 'ppfeister'), - ('Docker Hub', 'sherlock'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("GitHub", "ppfeister"), + ("GitHub", "sherlock-project"), + ("Docker Hub", "ppfeister"), + ("Docker Hub", "sherlock"), + ], + ) def test_known_positives_via_status_code(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('Keybase', 'blue'), - ('devRant', 'blue'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("Keybase", "blue"), + ("devRant", "blue"), + ], + ) def test_known_positives_via_response_url(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Randomly generate usernames of high length and test for positive availability # Randomly generated usernames should be simple alnum for simplicity and high # compatibility. Several attempts may be made ~just in case~ a real username is # generated. - @pytest.mark.parametrize('site,random_len',[ - ('GitLab', 255), - ('Codecademy', 30) - ]) + @pytest.mark.parametrize("site,random_len", [("GitLab", 255), ("Codecademy", 30)]) def test_likely_negatives_via_message(self, sites_info, site, random_len): num_attempts: int = 3 attempted_usernames: list[str] = [] status: QueryStatus = QueryStatus.CLAIMED for i in range(num_attempts): acceptable_types = string.ascii_letters + string.digits - random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + random_handle = "".join( + random.choice(acceptable_types) for _ in range(random_len) + ) attempted_usernames.append(random_handle) - status = simple_query(sites_info=sites_info, site=site, username=random_handle) + status = simple_query( + sites_info=sites_info, site=site, username=random_handle + ) if status is QueryStatus.AVAILABLE: break - assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." - + assert ( + status is QueryStatus.AVAILABLE + ), f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." # Randomly generate usernames of high length and test for positive availability # Randomly generated usernames should be simple alnum for simplicity and high # compatibility. Several attempts may be made ~just in case~ a real username is # generated. - @pytest.mark.parametrize('site,random_len',[ - ('GitHub', 39), - ('Docker Hub', 30) - ]) + @pytest.mark.parametrize("site,random_len", [("GitHub", 39), ("Docker Hub", 30)]) def test_likely_negatives_via_status_code(self, sites_info, site, random_len): num_attempts: int = 3 attempted_usernames: list[str] = [] status: QueryStatus = QueryStatus.CLAIMED for i in range(num_attempts): acceptable_types = string.ascii_letters + string.digits - random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + random_handle = "".join( + random.choice(acceptable_types) for _ in range(random_len) + ) attempted_usernames.append(random_handle) - status = simple_query(sites_info=sites_info, site=site, username=random_handle) + status = simple_query( + sites_info=sites_info, site=site, username=random_handle + ) if status is QueryStatus.AVAILABLE: break - assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + assert ( + status is QueryStatus.AVAILABLE + ), f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." def test_username_illegal_regex(sites_info): - site: str = 'BitBucket' - invalid_handle: str = '*#$Y&*JRE' - pattern = re.compile(sites_info[site]['regexCheck']) + site: str = "BitBucket" + invalid_handle: str = "*#$Y&*JRE" + pattern = re.compile(sites_info[site]["regexCheck"]) # Ensure that the username actually fails regex before testing sherlock assert pattern.match(invalid_handle) is None - assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL - + assert ( + simple_query(sites_info=sites_info, site=site, username=invalid_handle) + is QueryStatus.ILLEGAL + ) diff --git a/tests/test_ux.py b/tests/test_ux.py index 3c62463b50..84d7ed414a 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -3,41 +3,56 @@ from sherlock_interactives import Interactives from sherlock_interactives import InteractivesSubprocessError + def test_remove_nsfw(sites_obj): - nsfw_target: str = 'Pornhub' + nsfw_target: str = "Pornhub" assert nsfw_target in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites() assert nsfw_target not in {site.name: site.information for site in sites_obj} # Parametrized sites should *not* include Motherless, which is acting as the control -@pytest.mark.parametrize('nsfwsites', [ - ['Pornhub'], - ['Pornhub', 'Xvideos'], -]) +@pytest.mark.parametrize( + "nsfwsites", + [ + ["Pornhub"], + ["Pornhub", "Xvideos"], + ], +) def test_nsfw_explicit_selection(sites_obj, nsfwsites): for site in nsfwsites: assert site in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites) for site in nsfwsites: assert site in {site.name: site.information for site in sites_obj} - assert 'Motherless' not in {site.name: site.information for site in sites_obj} + assert "Motherless" not in {site.name: site.information for site in sites_obj} + def test_wildcard_username_expansion(): - assert sherlock.check_for_parameter('test{?}test') is True - assert sherlock.check_for_parameter('test{.}test') is False - assert sherlock.check_for_parameter('test{}test') is False - assert sherlock.check_for_parameter('testtest') is False - assert sherlock.check_for_parameter('test{?test') is False - assert sherlock.check_for_parameter('test?}test') is False - assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"] - - -@pytest.mark.parametrize('cliargs', [ - '', - '--site urghrtuight --egiotr', - '--', -]) + assert sherlock.check_for_parameter("test{?}test") is True + assert sherlock.check_for_parameter("test{.}test") is False + assert sherlock.check_for_parameter("test{}test") is False + assert sherlock.check_for_parameter("testtest") is False + assert sherlock.check_for_parameter("test{?test") is False + assert sherlock.check_for_parameter("test?}test") is False + assert sherlock.multiple_usernames("test{?}test") == [ + "test_test", + "test-test", + "test.test", + ] + + +@pytest.mark.parametrize( + "cliargs", + [ + "", + "--site urghrtuight --egiotr", + "--", + ], +) def test_no_usernames_provided(cliargs): - with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"): + with pytest.raises( + InteractivesSubprocessError, + match=r"error: the following arguments are required: USERNAMES", + ): Interactives.run_cli(cliargs) diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py index 33922c5e9d..e35587f07b 100644 --- a/tests/test_validate_targets.py +++ b/tests/test_validate_targets.py @@ -6,27 +6,40 @@ from sherlock_project.notify import QueryNotify from sherlock_project.result import QueryResult, QueryStatus - -FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit -FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable) -FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry - - -def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str: +FALSE_POSITIVE_ATTEMPTS: int = ( + 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit +) +FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = ( + 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable) +) +FALSE_POSITIVE_DEFAULT_PATTERN: str = ( + r"^[a-zA-Z0-9]{7,20}$" # Used in absence of a regexCheck entry +) + + +def set_pattern_upper_bound( + pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND +) -> str: """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`.""" - def replace_upper_bound(match: re.Match) -> str: # type: ignore - lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore + + def replace_upper_bound(match: re.Match) -> str: # type: ignore + lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore nonlocal upper_bound - upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823 - return f'{{{lower_bound},{upper_bound}}}' + upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823 + return f"{{{lower_bound},{upper_bound}}}" - pattern = re.sub(r'(? QueryStatus: + +def false_positive_check( + sites_info: dict[str, dict[str, str]], site: str, pattern: str +) -> QueryStatus: """Check if a site is likely to produce false positives.""" status: QueryStatus = QueryStatus.UNKNOWN @@ -38,13 +51,15 @@ def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, patte username=username, site_data=sites_info, query_notify=query_notify, - )[site]['status'] + )[site]["status"] - if not hasattr(result, 'status'): - raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}") - if type(result.status) is not QueryStatus: # type: ignore - raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore - status = result.status # type: ignore + if not hasattr(result, "status"): + raise TypeError( + f"Result for site {site} does not have 'status' attribute. Actual result: {result}" + ) + if type(result.status) is not QueryStatus: # type: ignore + raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore + status = result.status # type: ignore if status in (QueryStatus.AVAILABLE, QueryStatus.WAF): return status @@ -52,25 +67,30 @@ def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, patte return status -def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus: +def false_negative_check( + sites_info: dict[str, dict[str, str]], site: str +) -> QueryStatus: """Check if a site is likely to produce false negatives.""" status: QueryStatus = QueryStatus.UNKNOWN query_notify: QueryNotify = QueryNotify() result: QueryResult | str = sherlock( - username=sites_info[site]['username_claimed'], + username=sites_info[site]["username_claimed"], site_data=sites_info, query_notify=query_notify, - )[site]['status'] + )[site]["status"] - if not hasattr(result, 'status'): - raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}") - if type(result.status) is not QueryStatus: # type: ignore - raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore - status = result.status # type: ignore + if not hasattr(result, "status"): + raise TypeError( + f"Result for site {site} does not have 'status' attribute. Actual result: {result}" + ) + if type(result.status) is not QueryStatus: # type: ignore + raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore + status = result.status # type: ignore return status + @pytest.mark.validate_targets @pytest.mark.online class Test_All_Targets: @@ -81,7 +101,7 @@ def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]): pattern: str for site in chunked_sites: try: - pattern = chunked_sites[site]['regexCheck'] + pattern = chunked_sites[site]["regexCheck"] except KeyError: pattern = FALSE_POSITIVE_DEFAULT_PATTERN @@ -89,12 +109,15 @@ def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]): pattern = set_pattern_upper_bound(pattern) result: QueryStatus = false_positive_check(chunked_sites, site, pattern) - assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}" + assert ( + result is QueryStatus.AVAILABLE + ), f"{site} produced false positive with pattern {pattern}, result was {result}" @pytest.mark.validate_targets_fn def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]): """Iterate through all sites in the manifest to discover possible false-negative inducting targets.""" for site in chunked_sites: result: QueryStatus = false_negative_check(chunked_sites, site) - assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}" - + assert ( + result is QueryStatus.CLAIMED + ), f"{site} produced false negative, result was {result}" diff --git a/tests/test_version.py b/tests/test_version.py index 2de64dddd0..ba3add659a 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -2,13 +2,14 @@ from sherlock_interactives import Interactives import sherlock_project + def test_versioning() -> None: # Ensure __version__ matches version presented to the user assert sherlock_project.__version__ in Interactives.run_cli("--version") # Ensure __init__ is single source of truth for __version__ in package # Temporarily allows sherlock.py so as to not trigger early upgrades - found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *') - expected:list = [ + found: list = Interactives.walk_sherlock_for_files_with(r"__version__ *= *") + expected: list = [ # Normalization is REQUIRED for Windows ( / vs \ ) os.path.normpath("sherlock_project/__init__.py"), ]