Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
_ALL_PYPI_FORMATS,
FAKEURL_PATHCOMPONENT,
AliasType,
_safe_netloc,
arch_id,
channel_from_url,
correct_splitext,
Expand Down Expand Up @@ -180,19 +181,21 @@ def make_partial_cache_url(cls, base_url: str, is_real_url: bool = True):
cls._ensure_class_per_type()
url = urlparse(base_url)

if is_real_url or url.netloc.split("/")[0] == FAKEURL_PATHCOMPONENT:
safe_netloc = _safe_netloc(url)

if is_real_url or safe_netloc.split("/")[0] == FAKEURL_PATHCOMPONENT:
return os.path.join(
cast(str, CONDA_PACKAGES_DIRNAME),
cls.TYPE,
url.netloc,
safe_netloc,
url.path.lstrip("/"),
)
else:
return os.path.join(
cast(str, CONDA_PACKAGES_DIRNAME),
cls.TYPE,
FAKEURL_PATHCOMPONENT,
url.netloc,
safe_netloc,
url.path.lstrip("/"),
)

Expand Down Expand Up @@ -365,11 +368,12 @@ def __init__(
# If it is not a real URL, add the FAKEURL_PATHCOMPONENT but only if not
# already there.
url_parse_result = urlparse(self._url)
if not url_parse_result.netloc.startswith(FAKEURL_PATHCOMPONENT):
safe_netloc = _safe_netloc(url_parse_result)
if not safe_netloc.startswith(FAKEURL_PATHCOMPONENT):
self._url = urlunparse(
(
url_parse_result.scheme,
os.path.join(FAKEURL_PATHCOMPONENT, url_parse_result.netloc),
os.path.join(FAKEURL_PATHCOMPONENT, safe_netloc),
url_parse_result.path,
url_parse_result.params,
url_parse_result.query,
Expand Down Expand Up @@ -502,9 +506,10 @@ def is_fetched(self, pkg_format: str) -> bool:
def is_downloadable_url(self, pkg_format: Optional[str] = None) -> bool:
if not pkg_format:
pkg_format = self._url_format
return pkg_format == self._url_format and not urlparse(
self._url
).netloc.startswith(FAKEURL_PATHCOMPONENT)
url_parsed = urlparse(self._url)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

return pkg_format == self._url_format and not _safe_netloc(
url_parsed
).startswith(FAKEURL_PATHCOMPONENT)

def is_derived(self) -> bool:
# If the filename component of the URL does not match the filename of this package,
Expand Down Expand Up @@ -776,7 +781,9 @@ def is_external_url(self, sources: Dict[str, List[str]]) -> bool:
# This is used to determine if we should consider a package as a local package to
# be downloaded before resolving the environment
pypi_sources = sources.get("pypi", [])
return not any(urlparse(source).netloc in self.url for source in pypi_sources)
return not any(
(urlparse(source).hostname or "") in self.url for source in pypi_sources
)


class ResolvedEnvironment:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def resolve(
outfile_name,
"-k",
"explicit",
"--strip-auth",
"--conda",
]
# if "micromamba_server" in self._conda._bins:
Expand Down Expand Up @@ -358,7 +359,11 @@ def resolve(
base_build_url = components[4]
parse = urlparse(base_build_url)
clean_path, clean_commit = parse.path.split("@")
clean_url = parse.scheme[4:] + parse.netloc + clean_path
# Use hostname (not netloc) to avoid leaking embedded credentials
safe_netloc = parse.hostname or ""
if parse.port:
safe_netloc += ":%d" % parse.port
clean_url = parse.scheme[4:] + safe_netloc + clean_path
base_pkg_url = "%s/%s" % (clean_url, clean_commit)
# TODO: Do we need to handle subdirectories
cache_base_url = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
Union,
cast,
)
from urllib.parse import unquote, urlparse
from urllib.parse import unquote, urlparse, urlunparse

import metaflow.metaflow_config as mf_config
from metaflow._vendor.packaging.markers import Marker, default_environment
Expand Down Expand Up @@ -754,6 +754,34 @@ def channel_or_url(url: str) -> str:
return url


def strip_url_credentials(url: str) -> str:
"""Strip username:password from a URL, preserving everything else."""
parsed = urlparse(url)
if not parsed.username:
return url
netloc = parsed.hostname or ""
if parsed.port:
netloc += ":%d" % parsed.port
return urlunparse(
(
parsed.scheme,
netloc,
parsed.path,
parsed.params,
parsed.query,
parsed.fragment,
)
)


def _safe_netloc(url_parse_result) -> str:
"""Return netloc without credentials (hostname:port only)."""
netloc = url_parse_result.hostname or ""
if url_parse_result.port:
netloc += ":%d" % url_parse_result.port
return netloc


def auth_from_urls(urls: List[str]) -> Optional[AuthBase]:
auths_per_hostname = {
cast(str, h): HTTPBasicAuth(username, pwd)
Expand Down
80 changes: 80 additions & 0 deletions tests/plugins/conda/test_strip_url_credentials.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""Tests for strip_url_credentials and safe_netloc helpers in utils.py."""

import sys
import os

sys.path.insert(
0,
os.path.join(
os.path.dirname(__file__),
"..",
"..",
"..",
"metaflow-netflixext",
),
)

from urllib.parse import urlparse
from metaflow_extensions.nflx.plugins.conda.utils import (
strip_url_credentials,
_safe_netloc,
)


class TestStripUrlCredentials:
def test_no_credentials(self):
url = "https://pypi.org/simple/"
assert strip_url_credentials(url) == url

def test_user_and_password(self):
url = "https://user:token@private-pypi.example.com/simple/"
assert strip_url_credentials(url) == "https://private-pypi.example.com/simple/"

def test_user_only(self):
url = "https://user@private-pypi.example.com/simple/"
assert strip_url_credentials(url) == "https://private-pypi.example.com/simple/"

def test_preserves_port(self):
url = "https://user:token@private-pypi.example.com:8080/simple/"
assert (
strip_url_credentials(url)
== "https://private-pypi.example.com:8080/simple/"
)

def test_preserves_query_and_fragment(self):
url = "https://user:pass@host.com/path?q=1#frag"
assert strip_url_credentials(url) == "https://host.com/path?q=1#frag"

def test_file_url_unchanged(self):
url = "file:///tmp/packages/foo-1.0.tar.gz"
assert strip_url_credentials(url) == url

def test_empty_string(self):
assert strip_url_credentials("") == ""

def test_git_plus_https(self):
url = "git+https://token:x-oauth@github.com/org/repo.git@abc123"
result = strip_url_credentials(url)
assert "token" not in result
assert "x-oauth" not in result
assert "github.com" in result


class TestSafeNetloc:
def test_simple_host(self):
parsed = urlparse("https://pypi.org/simple/")
assert _safe_netloc(parsed) == "pypi.org"

def test_host_with_port(self):
parsed = urlparse("https://pypi.org:8080/simple/")
assert _safe_netloc(parsed) == "pypi.org:8080"

def test_credentials_stripped(self):
parsed = urlparse("https://user:token@private.com/simple/")
assert _safe_netloc(parsed) == "private.com"
assert "user" not in _safe_netloc(parsed)
assert "token" not in _safe_netloc(parsed)

def test_credentials_with_port_stripped(self):
parsed = urlparse("https://user:token@private.com:443/simple/")
assert _safe_netloc(parsed) == "private.com:443"
Loading