Skip to content

implement sigv4 signing for s3 downloads #21956

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/notes/2.27.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ Minor fixes:

- If a sandbox for executing mypy is preserved, the `__run.sh` script now refers to the main script by a relative path and [can thus be successfully executed](https://github.com/pantsbuild/pants/issues/22138).

#### S3

The S3 backend now uses Signature Version 4 for signing requests, allowing use of KMS encrypted objects in S3. The HMACV1 signing can be enabled by setting [the `[s3-url-handler].auth_signing` option](https://www.pantsbuild.org/2.27/reference/subsystems/s3-url-handler#auth_signing) to `hmacv1`.

#### Shell

The shell backend now has far less overhead when parsing shell imports on macOS: the shellcheck executable is now hard-linked into sandboxes, which side-steps Gatekeeper checks on macOS (when enabled, as they are by default) that made concurrent executions behave as if they were run sequentially.
Expand Down
117 changes: 111 additions & 6 deletions src/python/pants/backend/url_handlers/s3/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
DownloadS3SchemeURL,
)
from pants.backend.url_handlers.s3.register import rules as s3_rules
from pants.backend.url_handlers.s3.subsystem import S3AuthSigning
from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest
from pants.engine.fs import Digest, FileDigest, NativeDownloadFile, Snapshot
from pants.engine.rules import QueryRule
Expand Down Expand Up @@ -44,11 +45,15 @@ def rule_runner() -> RuleRunner:
)


class NoCredentialsError(Exception):
pass


@pytest.fixture
def monkeypatch_botocore(monkeypatch):
def do_patching(expected_url):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

renamed to match the parameter in the tests cases below

def do_patching(expected_auth_url):
botocore = SimpleNamespace()
botocore.exceptions = SimpleNamespace(NoCredentialsError=Exception)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use of Exception here was causing some tests to pass (through this catch

except exceptions.NoCredentialsError:
) that should have been failing. I've updated the cases.

botocore.exceptions = SimpleNamespace(NoCredentialsError=NoCredentialsError)

class FakeSession:
def __init__(self):
Expand All @@ -58,6 +63,9 @@ def __init__(self):
def set_config_variable(self, key, value):
self.config_vars.update({key: value})

def get_config_variable(self, key):
return self.config_vars.get(key) or "us-east-1"

def get_credentials(self):
if self.creds:
return self.creds
Expand Down Expand Up @@ -91,14 +99,24 @@ def load_credentials(self):
Credentials=FakeCredentials.create,
)

def fake_auth_ctor(creds):
def fake_auth_ctor(creds, service_name, region_name):
assert service_name == "s3"
assert region_name in ["us-east-1", "us-west-2"]

def add_auth(request):
assert request.url == expected_auth_url
request.headers["AUTH"] = "TOKEN"

return SimpleNamespace(add_auth=add_auth)

def fake_hmac_v1_auth_ctor(creds):
def add_auth(request):
request.url == expected_url
assert request.url == expected_auth_url
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing assert

request.headers["AUTH"] = "TOKEN"

return SimpleNamespace(add_auth=add_auth)

botocore.auth = SimpleNamespace(HmacV1Auth=fake_auth_ctor)
botocore.auth = SimpleNamespace(SigV4Auth=fake_auth_ctor, HmacV1Auth=fake_hmac_v1_auth_ctor)

monkeypatch.setitem(sys.modules, "botocore", botocore)

Expand All @@ -121,69 +139,149 @@ def new_init(self, **kwargs):


@pytest.mark.parametrize(
"request_url, expected_auth_url, expected_native_url, req_type",
"request_url, expected_auth_url, expected_native_url, req_type, auth_type",
[
(
"s3://bucket/keypart1/keypart2/file.txt",
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

under hmacv1 the url thats signed is the s3 path style (which can be different that what the request is actually made with)

"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3SchemeURL,
S3AuthSigning.HMACV1,
),
(
"s3://bucket/keypart1/keypart2/file.txt",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for sigv4 is the same url we make the request with (virtual host style)

DownloadS3SchemeURL,
S3AuthSigning.SIGV4,
),
(
"s3://bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3SchemeURL,
S3AuthSigning.HMACV1,
),
(
"s3://bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3SchemeURL,
S3AuthSigning.SIGV4,
),
# Path-style
(
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt",
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3AuthorityPathStyleURL,
S3AuthSigning.HMACV1,
),
(
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3AuthorityPathStyleURL,
S3AuthSigning.SIGV4,
),
(
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3AuthorityPathStyleURL,
S3AuthSigning.HMACV1,
),
(
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3AuthorityPathStyleURL,
S3AuthSigning.SIGV4,
),
(
"https://s3.us-west-2.amazonaws.com/bucket/keypart1/keypart2/file.txt",
"https://s3.us-west-2.amazonaws.com/bucket/keypart1/keypart2/file.txt",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3AuthorityPathStyleURL,
S3AuthSigning.HMACV1,
),
(
"https://s3.us-west-2.amazonaws.com/bucket/keypart1/keypart2/file.txt",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3AuthorityPathStyleURL,
S3AuthSigning.SIGV4,
),
(
"https://s3.us-west-2.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://s3.us-west-2.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3AuthorityPathStyleURL,
S3AuthSigning.HMACV1,
),
(
"https://s3.us-west-2.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3AuthorityPathStyleURL,
S3AuthSigning.SIGV4,
),
# Virtual-hosted-style
(
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3AuthorityVirtualHostedStyleURL,
S3AuthSigning.HMACV1,
),
(
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3AuthorityVirtualHostedStyleURL,
S3AuthSigning.SIGV4,
),
(
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3AuthorityVirtualHostedStyleURL,
S3AuthSigning.HMACV1,
),
(
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3AuthorityVirtualHostedStyleURL,
S3AuthSigning.SIGV4,
),
(
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://s3.amazonaws.com/bucket/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3AuthorityVirtualHostedStyleURL,
S3AuthSigning.HMACV1,
),
(
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
"https://bucket.s3.amazonaws.com/keypart1/keypart2/file.txt?versionId=ABC123",
DownloadS3AuthorityVirtualHostedStyleURL,
S3AuthSigning.SIGV4,
),
(
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt",
"https://s3.us-west-2.amazonaws.com/bucket/keypart1/keypart2/file.txt",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3AuthorityVirtualHostedStyleURL,
S3AuthSigning.HMACV1,
),
(
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt",
"https://bucket.s3.us-west-2.amazonaws.com/keypart1/keypart2/file.txt",
DownloadS3AuthorityVirtualHostedStyleURL,
S3AuthSigning.SIGV4,
),
],
)
Expand All @@ -194,6 +292,7 @@ def test_download_s3(
expected_auth_url: str,
expected_native_url: str,
req_type: type,
auth_type: S3AuthSigning,
replace_url,
) -> None:
class S3HTTPHandler(BaseHTTPRequestHandler):
Expand All @@ -213,6 +312,12 @@ def send_headers(self):
self.send_header("Content-Length", f"{len(self.response_text)}")
self.end_headers()

rule_runner.set_options(
args=[
f"--s3-url-handler-auth-signing={auth_type.value}",
],
)

monkeypatch_botocore(expected_auth_url)
with http_server(S3HTTPHandler) as port:
replace_url(expected_native_url, port)
Expand Down
88 changes: 65 additions & 23 deletions src/python/pants/backend/url_handlers/s3/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
from typing import Any
from urllib.parse import urlsplit

from pants.backend.url_handlers.s3.subsystem import S3AuthSigning, S3Subsystem
from pants.engine.download_file import URLDownloadHandler
from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest
from pants.engine.environment import ChosenLocalEnvironmentName, EnvironmentName
from pants.engine.fs import Digest, NativeDownloadFile
from pants.engine.internals.native_engine import FileDigest
from pants.engine.internals.native_engine import EMPTY_FILE_DIGEST, FileDigest
from pants.engine.internals.selectors import Get
from pants.engine.rules import collect_rules, rule
from pants.engine.unions import UnionRule
Expand All @@ -21,13 +22,13 @@

CONTENT_TYPE = "binary/octet-stream"


logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class AWSCredentials:
creds: Any
default_region: str | None


@rule
Expand Down Expand Up @@ -92,8 +93,9 @@ async def access_aws_credentials(
)

creds = credentials.create_credential_resolver(session).load_credentials()
default_region = session.get_config_variable("region")

return AWSCredentials(creds)
return AWSCredentials(creds=creds, default_region=default_region)


@dataclass(frozen=True)
Expand All @@ -107,40 +109,80 @@ class S3DownloadFile:

@rule
async def download_from_s3(
request: S3DownloadFile, aws_credentials: AWSCredentials, global_options: GlobalOptions
request: S3DownloadFile,
aws_credentials: AWSCredentials,
global_options: GlobalOptions,
s3_subsystem: S3Subsystem,
) -> Digest:
from botocore import auth, compat, exceptions # pants: no-infer-dep

# NB: The URL for auth is expected to be in path-style
path_style_url = "https://s3"
virtual_hosted_url = f"https://{request.bucket}.s3.amazonaws.com/{request.key}"
if request.region:
path_style_url += f".{request.region}"
path_style_url += f".amazonaws.com/{request.bucket}/{request.key}"
virtual_hosted_url = (
f"https://{request.bucket}.s3.{request.region}.amazonaws.com/{request.key}"
)
if request.query:
path_style_url += f"?{request.query}"
virtual_hosted_url += f"?{request.query}"

headers = compat.HTTPHeaders()
http_request = SimpleNamespace(
url=path_style_url,
headers=headers,
method="GET",
auth_path=None,
)
signer = None
http_request = None

if s3_subsystem.auth_signing == S3AuthSigning.SIGV4:
# sigv4 uses the virtual_hosted_url for the auth request
http_request = SimpleNamespace(
url=virtual_hosted_url,
headers=headers,
method="GET",
auth_path=None,
data=None,
params={},
context={},
body={},
)

# Add x-amz-content-SHA256 as per boto code
# ref link - https://github.com/boto/botocore/blob/547b20801770c8ea4255ee9c3b809fea6b9f6bc4/botocore/auth.py#L52C1-L54C2
headers.add_header(
"X-Amz-Content-SHA256",
EMPTY_FILE_DIGEST.fingerprint,
)

# A region is required to sign the request with sigv4. If we don't know where the bucket is,
# default to the region from the credentials
signing_region = request.region or aws_credentials.default_region
if not signing_region:
raise Exception(
"An aws region is required to sign requests with sigv4. Please specify a region in the url or configure the default region in aws config or environment variables."
)

signer = auth.SigV4Auth(aws_credentials.creds, "s3", signing_region)

else:
assert s3_subsystem.auth_signing == S3AuthSigning.HMACV1
# NB: The URL for HmacV1 auth is expected to be in path-style
path_style_url = "https://s3"
if request.region:
path_style_url += f".{request.region}"
path_style_url += f".amazonaws.com/{request.bucket}/{request.key}"
if request.query:
path_style_url += f"?{request.query}"

http_request = SimpleNamespace(
url=path_style_url,
headers=headers,
method="GET",
auth_path=None,
)
signer = auth.HmacV1Auth(aws_credentials.creds)

# NB: The added Auth header doesn't need to be valid when accessing a public bucket. When
# hand-testing, you MUST test against a private bucket to ensure it works for private buckets too.
signer = auth.HmacV1Auth(aws_credentials.creds)
try:
signer.add_auth(http_request)
except exceptions.NoCredentialsError:
pass # The user can still access public S3 buckets without credentials

virtual_hosted_url = f"https://{request.bucket}.s3"
if request.region:
virtual_hosted_url += f".{request.region}"
virtual_hosted_url += f".amazonaws.com/{request.key}"
if request.query:
virtual_hosted_url += f"?{request.query}"

return await Get(
Digest,
NativeDownloadFile(
Expand Down
Loading
Loading