Skip to content

Commit 77dc87c

Browse files
authored
Fix #1479
1 parent 70a7635 commit 77dc87c

4 files changed

Lines changed: 42 additions & 20 deletions

File tree

lib/connection/response.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
UNKNOWN,
3232
)
3333
from lib.parse.url import clean_path, parse_path
34-
from lib.utils.common import get_readable_size, is_binary, replace_from_all_encodings
34+
from lib.utils.common import get_readable_size, is_binary, replace_path
3535

3636

3737
class BaseResponse:
@@ -42,7 +42,7 @@ def __init__(self, url, response: requests.Response | httpx.Response) -> None:
4242
self.path = clean_path(self.full_path)
4343
self.status = response.status_code
4444
self.headers = response.headers
45-
self.redirect = self.headers.get("location") or ""
45+
self.redirect = self.headers.get("location", "")
4646
self.history = [str(res.url) for res in response.history]
4747
self.content = ""
4848
self.body = b""
@@ -68,7 +68,7 @@ def size(self) -> str:
6868
def __hash__(self) -> int:
6969
# Hash the static parts of the response only.
7070
# See https://github.com/maurosoria/dirsearch/pull/1436#issuecomment-2476390956
71-
body = replace_from_all_encodings(self.content, self.full_path.split("#")[0], "") if self.content else self.body
71+
body = replace_path(self.content, self.full_path.split("#")[0], "") if self.content else self.body
7272
return hash((self.status, body))
7373

7474
def __eq__(self, other: Any) -> bool:

lib/core/scanner.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
WILDCARD_TEST_POINT_MARKER,
3434
)
3535
from lib.parse.url import clean_path
36-
from lib.utils.common import replace_from_all_encodings
36+
from lib.utils.common import replace_path
3737
from lib.utils.diff import DynamicContentParser, generate_matching_regex
3838
from lib.utils.random import rand_string
3939

@@ -68,7 +68,7 @@ def check(self, path: str, response: BaseResponse) -> bool:
6868
and we get rid of queries/DOM in path as well because queries in path are usually
6969
reflected in the redirect as queries too (but we have already got rid of them).
7070
"""
71-
redirect = replace_from_all_encodings(
71+
redirect = replace_path(
7272
clean_path(response.redirect),
7373
clean_path(path),
7474
REFLECTED_PATH_MARKER,
@@ -111,17 +111,17 @@ def generate_redirect_regex(first_loc: str, first_path: str, second_loc: str, se
111111
112112
How it works:
113113
1. Replace path in 2 redirect URLs (if it gets reflected in) with a mark
114-
(e.g. /path1 -> /foo/path1 and /path2 -> /foo/path2 will become /foo/[mark] for both)
114+
(e.g. /path1 -> /foo/path1 and /path2 -> /foo/path2 will become /foo[mark] for both)
115115
2. Compare 2 redirects and generate a regex that matches both
116-
(e.g. /foo/[mark] and /foo/[mark] will have the regex: ^/foo/[mark]$)
116+
(e.g. /foo[mark] and /foo[mark] will have the regex: ^/foo[mark]$)
117117
3. To check if a redirect is wildcard, replace path with the mark and check if it matches this regex
118-
(e.g. /path3 -> /bar/path3, the redirect becomes /bar/[mark], which doesn't match the regex ^/foo/[mark]$)
118+
(e.g. /path3 -> /bar/path3, the redirect becomes /bar[mark], which doesn't match the regex ^/foo[mark]$)
119119
"""
120120

121121
if first_path:
122-
first_loc = first_loc.replace(first_path, REFLECTED_PATH_MARKER)
122+
first_loc = first_loc.replace("/" + first_path, REFLECTED_PATH_MARKER)
123123
if second_path:
124-
second_loc = second_loc.replace(second_path, REFLECTED_PATH_MARKER)
124+
second_loc = second_loc.replace("/" + second_path, REFLECTED_PATH_MARKER)
125125

126126
return generate_matching_regex(first_loc, second_loc)
127127

lib/utils/common.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import os
2020
import sys
21+
import re
2122

2223
from functools import reduce
2324
from json import dumps
@@ -136,13 +137,24 @@ def read_stdin():
136137
return buffer
137138

138139

139-
# Replace a substring from an HTML body, where the substring might be encoded
140+
# Replace a path from an HTML body, where the path might be encoded/decoded
140141
# in many different ways (URL encoding, HTML escaping, ...).
141-
def replace_from_all_encodings(string, to_replace, replace_with):
142-
string = string.replace(quote(to_replace), replace_with)
143-
string = string.replace(quote(quote(to_replace)), replace_with)
144-
string = string.replace(unquote(to_replace), replace_with)
145-
string = string.replace(unquote(unquote(to_replace)), replace_with)
146-
string = string.replace(escape(to_replace), replace_with)
147-
string = string.replace(dumps(to_replace), replace_with)
148-
return string.replace(to_replace, replace_with)
142+
#
143+
# Note:
144+
# - :path: argument must not start with an "/".
145+
# - The path in the body followed by an alphanumeric character won't
146+
# be replaced. For example, "abc" will be replaced from "abc def" but
147+
# not "abcdef".
148+
def replace_path(string, path, replace_with):
149+
def sub(string, to_replace, replace_with):
150+
regex = re.escape(to_replace) + "(?=[^\\w]|$)"
151+
return re.sub(to_replace, replace_with, string)
152+
153+
path = "/" + path
154+
string = sub(string, quote(path), replace_with)
155+
string = sub(string, quote(quote(path)), replace_with)
156+
string = sub(string, unquote(path), replace_with)
157+
string = sub(string, unquote(unquote(path)), replace_with)
158+
string = sub(string, escape(path), replace_with)
159+
string = sub(string, dumps(path), replace_with)
160+
return sub(string, path, replace_with)

tests/utils/test_common.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,20 @@
1818

1919
from unittest import TestCase
2020

21-
from lib.utils.common import merge_path, strip_and_uniquify, get_valid_filename
21+
from lib.utils.common import (
22+
merge_path,
23+
replace_path,
24+
strip_and_uniquify,
25+
get_valid_filename,
26+
)
2227

2328

2429
class TestCommonUtils(TestCase):
30+
def test_replace_path(self):
31+
self.assertEqual(replace_path("/abc or /abc?k=v", "abc", "REPLACED"), "REPLACED or REPLACED?k=v", "Path was not replaced")
32+
self.assertEqual(replace_path("http://a.co/abc", "abc", "REPLACED"), "http://a.comREPLACED", "Path was not replaced")
33+
self.assertEqual(replace_path("http://a.co/abcdef", "abc", "REPLACED"), "http://a.com/abcdef", "Path was replaced eventhough it should have not")
34+
2535
def test_strip_and_uniquify(self):
2636
self.assertEqual(strip_and_uniquify(["foo", "bar", " bar ", "foo"]), ["foo", "bar"], "The results are not stripped or contain duplicates or in wrong order")
2737

0 commit comments

Comments
 (0)