Skip to content

Commit a776d28

Browse files
committed
links: fix url joining
1 parent 0725d88 commit a776d28

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

library/utils/web.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -639,9 +639,11 @@ def construct_absolute_url(base_url, href):
639639
return href
640640

641641
if not up.netloc:
642-
if not base_url.endswith("/") and not href.startswith("/"):
643-
base_url += "/"
644-
642+
base_parsed = urlparse(base_url)
643+
path = base_parsed.path
644+
if not path.endswith("/") and path != "":
645+
path = path.rsplit("/", 1)[0] + "/"
646+
base_url = base_parsed._replace(path=path).geturl()
645647
href = urljoin(base_url, href)
646648

647649
if href.startswith("//"):

tests/utils/test_web.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from bs4 import BeautifulSoup
55

66
from library.utils.path_utils import safe_unquote
7-
from library.utils.web import WebPath, extract_nearby_text, url_encode, url_to_local_path
7+
from library.utils.web import WebPath, construct_absolute_url, extract_nearby_text, url_encode, url_to_local_path
88
from tests.utils import p
99

1010

@@ -250,3 +250,22 @@ def test_parent_property():
250250
assert str(web_path.parent.parent.parent.parent.parent.parent) == "https://<netloc>/<path1>"
251251
assert str(web_path.parent.parent.parent.parent.parent.parent.parent) == "https://<netloc>"
252252
assert str(web_path.parent.parent.parent.parent.parent.parent.parent.parent) == "https://<netloc>"
253+
254+
@pytest.mark.parametrize(
255+
"base_url, href, expected",
256+
[
257+
("https://unli.xyz/diskprices/index.html", "./ch/", "https://unli.xyz/diskprices/ch/"),
258+
("https://unli.xyz/diskprices/index.html", "ch/", "https://unli.xyz/diskprices/ch/"),
259+
("https://unli.xyz/diskprices/index.html", "/ch/", "https://unli.xyz/ch/"),
260+
("https://unli.xyz/diskprices/", "ch/", "https://unli.xyz/diskprices/ch/"),
261+
("https://unli.xyz/diskprices", "ch/", "https://unli.xyz/ch/"),
262+
("https://unli.xyz", "diskprices/ch/", "https://unli.xyz/diskprices/ch/"),
263+
("https://unli.xyz", "/ch/", "https://unli.xyz/ch/"),
264+
("https://unli.xyz/", "//example.com/ch/", "https://example.com/ch/"),
265+
("https://unli.xyz/diskprices", "ftp://example.com/ch/", "ftp://example.com/ch/"),
266+
("https://unli.xyz/diskprices", "ssh://example.com/ch/", "ssh://example.com/ch/"),
267+
]
268+
)
269+
def test_construct_absolute_url(base_url, href, expected):
270+
result = construct_absolute_url(base_url, href)
271+
assert result == expected

0 commit comments

Comments
 (0)