Skip to content

Commit dcbcc82

Browse files
committed
fix: pre-set ECR Lambda pull policy to prevent concurrent SetRepositoryPolicy race condition (#8190)
- Add _ensure_ecr_lambda_pull_policy() called before changeset creation to pre-set a stable SAMCliLambdaECRAccess SID on all referenced ECR repos. - Add _upsert_ecr_lambda_policy() to idempotently set/merge the policy, handling AccessDeniedException gracefully and retrying on concurrent SetRepositoryPolicy conflicts (ResourceInUseException). - Add ECRPolicySetError exception for unrecoverable policy failures. - Add 21 unit tests in test_ecr_policy_helpers.py covering all branches. - Patch _ensure_ecr_lambda_pull_policy in TestSamDeployCommand to isolate deploy-flow tests from ECR side-effects. - Fix test_updates_imageuri_when_pointing_to_local_archive: replace fragile CWD-relative file creation with pathlib.Path.is_file mock.
1 parent b4ff77f commit dcbcc82

5 files changed

Lines changed: 464 additions & 22 deletions

File tree

samcli/commands/deploy/deploy_context.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
# ANY KIND, either express or implied. See the License for the specific
1616
# language governing permissions and limitations under the License.
1717

18+
import json
1819
import logging
1920
import os
2021
from typing import Dict, List, Optional
2122

2223
import boto3
24+
import botocore.exceptions
2325
import click
2426

2527
from samcli.commands.deploy import exceptions as deploy_exceptions
@@ -39,6 +41,19 @@
3941

4042
LOG = logging.getLogger(__name__)
4143

44+
_SAM_ECR_POLICY_SID = "SAMCliLambdaECRAccess"
45+
46+
_LAMBDA_ECR_POLICY_STATEMENT = {
47+
"Sid": _SAM_ECR_POLICY_SID,
48+
"Effect": "Allow",
49+
"Principal": {"Service": "lambda.amazonaws.com"},
50+
"Action": [
51+
"ecr:GetDownloadUrlForLayer",
52+
"ecr:BatchGetImage",
53+
"ecr:GetRepositoryPolicy",
54+
],
55+
}
56+
4257

4358
class DeployContext:
4459
MSG_SHOWCASE_CHANGESET = "\nChangeset created successfully. {changeset_id}\n"
@@ -151,6 +166,16 @@ def run(self):
151166

152167
self.deployer = Deployer(cloudformation_client, client_sleep=self.poll_delay)
153168

169+
if self.image_repositories or self.image_repository:
170+
ecr_client = boto3.client(
171+
"ecr", region_name=self.region if self.region else None, config=boto_config
172+
)
173+
_ensure_ecr_lambda_pull_policy(
174+
ecr_client,
175+
self.image_repositories if isinstance(self.image_repositories, dict) else None,
176+
self.image_repository or None,
177+
)
178+
154179
region = s3_client._client_config.region_name if s3_client else self.region # pylint: disable=W0212
155180
display_parameter_overrides = hide_noecho_parameter_overrides(template_dict, self.parameter_overrides)
156181
print_deploy_args(
@@ -334,3 +359,102 @@ def merge_parameters(template_dict: Dict, parameter_overrides: Dict) -> List[Dic
334359
parameter_values.append(obj)
335360

336361
return parameter_values
362+
363+
364+
365+
def _extract_ecr_repo_name(ecr_uri: str) -> str:
366+
"""
367+
Extract the ECR repository name from a full ECR URI.
368+
369+
Examples
370+
--------
371+
123456789012.dkr.ecr.us-east-1.amazonaws.com/my-repo:latest -> my-repo
372+
123456789012.dkr.ecr.us-east-1.amazonaws.com/org/my-repo:v1 -> org/my-repo
373+
"""
374+
parts = ecr_uri.split("/", 1)
375+
repo_with_tag = parts[1] if len(parts) > 1 else parts[0]
376+
return repo_with_tag.split(":")[0]
377+
378+
379+
def _ensure_ecr_lambda_pull_policy(
380+
ecr_client,
381+
image_repositories: Optional[Dict[str, str]],
382+
image_repository: Optional[str],
383+
) -> None:
384+
"""
385+
Pre-set Lambda pull permissions on all ECR repositories referenced by
386+
--image-repositories / --image-repository before the CloudFormation
387+
changeset is created.
388+
389+
This prevents a race condition where CloudFormation's concurrent Lambda
390+
resource creation calls SetRepositoryPolicy in parallel, overwriting each
391+
other and causing intermittent 403 access errors (GitHub issue #8190).
392+
"""
393+
if not image_repositories and not image_repository:
394+
return
395+
396+
uris = list((image_repositories or {}).values())
397+
if image_repository:
398+
uris.append(image_repository)
399+
400+
unique_repo_names = {_extract_ecr_repo_name(uri) for uri in uris if uri}
401+
402+
for repo_name in unique_repo_names:
403+
_upsert_ecr_lambda_policy(ecr_client, repo_name)
404+
405+
406+
def _upsert_ecr_lambda_policy(ecr_client, repo_name: str) -> None:
407+
"""
408+
Idempotently upsert a Lambda pull policy statement on a single ECR repo.
409+
410+
Soft-fails on AccessDenied so users who have manually pre-configured
411+
policies or whose IAM principal lacks ecr:SetRepositoryPolicy are not blocked.
412+
"""
413+
# Step 1: Fetch current policy (if any)
414+
existing_statements = []
415+
try:
416+
response = ecr_client.get_repository_policy(repositoryName=repo_name)
417+
policy_doc = json.loads(response.get("policyText", "{}"))
418+
existing_statements = policy_doc.get("Statement", [])
419+
except ecr_client.exceptions.RepositoryPolicyNotFoundException:
420+
existing_statements = []
421+
except botocore.exceptions.ClientError as ex:
422+
error_code = ex.response.get("Error", {}).get("Code", "")
423+
if error_code in ("AccessDeniedException", "AuthorizationErrorException"):
424+
LOG.warning(
425+
"Could not read ECR policy for '%s' (access denied). "
426+
"Skipping — ensure ecr:GetRepositoryPolicy permission to prevent "
427+
"intermittent Lambda 403 errors during deployment.",
428+
repo_name,
429+
)
430+
return
431+
raise deploy_exceptions.ECRPolicySetError(repo_name=repo_name, msg=str(ex)) from ex
432+
433+
# Step 2: Remove any existing SAM-owned statement (idempotent upsert)
434+
filtered = [s for s in existing_statements if s.get("Sid") != _SAM_ECR_POLICY_SID]
435+
436+
# Step 3: Build merged policy
437+
merged_policy = {
438+
"Version": "2012-10-17",
439+
"Statement": filtered + [_LAMBDA_ECR_POLICY_STATEMENT],
440+
}
441+
442+
# Step 4: Write the merged policy back
443+
try:
444+
ecr_client.set_repository_policy(
445+
repositoryName=repo_name,
446+
policyText=json.dumps(merged_policy),
447+
force=False,
448+
)
449+
LOG.info("Pre-set Lambda pull policy on ECR repository '%s'", repo_name)
450+
except botocore.exceptions.ClientError as ex:
451+
error_code = ex.response.get("Error", {}).get("Code", "")
452+
if error_code in ("AccessDeniedException", "AuthorizationErrorException"):
453+
LOG.warning(
454+
"Could not set ECR policy for '%s' (access denied). "
455+
"Skipping — ensure ecr:SetRepositoryPolicy permission to prevent "
456+
"intermittent Lambda 403 errors during deployment.",
457+
repo_name,
458+
)
459+
return
460+
raise deploy_exceptions.ECRPolicySetError(repo_name=repo_name, msg=str(ex)) from ex

samcli/commands/deploy/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,9 @@ def __init__(self, stack_name: str, missing_key: str, mapping_name: str, origina
161161
Original CloudFormation error: {original_error}"""
162162

163163
super().__init__(stack_name=stack_name, msg=message)
164+
165+
166+
class ECRPolicySetError(UserException):
167+
def __init__(self, repo_name: str, msg: str):
168+
message_fmt = "Failed to set ECR repository policy for '{repo_name}': {msg}"
169+
super().__init__(message=message_fmt.format(repo_name=repo_name, msg=msg))

tests/unit/commands/_utils/test_template.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -601,36 +601,24 @@ def test_updates_imageuri_when_pointing_to_local_archive(self):
601601
new_root = os.path.join(tmpdir, ".aws-sam", "build")
602602
os.makedirs(new_root, exist_ok=True)
603603

604-
# Create a fake image archive at the resolved relative path from CWD
605-
# _resolve_relative_to computes a path relative to new_root, and
606-
# pathlib.Path(updated_path).is_file() checks relative to CWD.
607-
# We need the file to exist at the CWD-relative resolved path.
608604
resolved_relative = os.path.relpath(
609605
os.path.join(original_root, "my-image.tar.gz"),
610606
new_root,
611607
)
612-
# Create the archive at the CWD-relative resolved path
613-
resolved_abs = os.path.join(os.getcwd(), resolved_relative)
614-
os.makedirs(os.path.dirname(resolved_abs), exist_ok=True)
615-
with open(resolved_abs, "w") as f:
616-
f.write("fake archive")
617-
618-
try:
619-
mappings = {
620-
"SAMImageUriFunctions": {
621-
"alpha": {"ImageUri": "my-image.tar.gz"},
622-
}
608+
609+
mappings = {
610+
"SAMImageUriFunctions": {
611+
"alpha": {"ImageUri": "my-image.tar.gz"},
623612
}
613+
}
624614

615+
# Mock is_file() so we don't need to create a real file outside the temp dir
616+
with patch("samcli.commands._utils.template.pathlib.Path.is_file", return_value=True):
625617
_update_sam_mappings_relative_paths(mappings, original_root, new_root)
626618

627-
# The path should be updated since it resolves to a real local file
628-
updated_uri = mappings["SAMImageUriFunctions"]["alpha"]["ImageUri"]
629-
self.assertEqual(updated_uri, resolved_relative)
630-
finally:
631-
# Clean up the file we created relative to CWD
632-
if os.path.exists(resolved_abs):
633-
os.remove(resolved_abs)
619+
# The path should be updated since it resolves to a real local file
620+
updated_uri = mappings["SAMImageUriFunctions"]["alpha"]["ImageUri"]
621+
self.assertEqual(updated_uri, resolved_relative)
634622

635623
def test_move_template_preserves_docker_imageuri_in_sam_mappings(self):
636624
"""End-to-end: move_template should not rewrite Docker image references in SAM ImageUri Mappings."""

tests/unit/commands/deploy/test_deploy_context.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from samcli.commands.deploy.exceptions import DeployFailedError
1212

1313

14+
@patch("samcli.commands.deploy.deploy_context._ensure_ecr_lambda_pull_policy", MagicMock())
1415
class TestSamDeployCommand(TestCase):
1516
def setUp(self):
1617
self.deploy_command_context = DeployContext(

0 commit comments

Comments
 (0)