https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#logsV2:log-groups/log-group/$252Faws$252Fcodebuild$252Fsagemaker-python-sdk-ci-integ-tests/log-events/e558697a-488d-4eab-a4ad-2971d9a1081f

lucasjia-aws · lucasjia-aws · commit d912e41abf5c · 2026-06-06T20:17:00.000-07:00
diff --git a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
@@ -34,6 +34,7 @@
     download_inference_assets,
     get_sm_session,
     get_tabular_data,
+    x_fail_if_ice,
 )
 
 INF2_SUPPORTED_REGIONS = {
@@ -192,6 +193,7 @@ def test_jumpstart_gated_model(setup):
     assert response is not None
 
 
+@x_fail_if_ice
 def test_jumpstart_gated_model_inference_component_enabled(setup):
 
     model_id = "meta-textgeneration-llama-2-7b"
diff --git a/tests/integ/sagemaker/jumpstart/utils.py b/tests/integ/sagemaker/jumpstart/utils.py
@@ -80,7 +80,12 @@ def wrapper(*args, **kwargs):
         try:
             return func(*args, **kwargs)
         except Exception as e:
-            if "CapacityError" in str(e):
+            # Insufficient capacity is a transient, region-level AWS condition
+            # (no instances available right now), not a SDK defect. SageMaker
+            # surfaces it either as a "CapacityError" or as an endpoint failure
+            # whose reason contains "InsufficientInstanceCapacity"; treat both as
+            # an expected failure so canaries don't go red on capacity shortages.
+            if "CapacityError" in str(e) or "InsufficientInstanceCapacity" in str(e):
                 pytest.xfail(str(e))
             raise
 
diff --git a/tests/integ/sagemaker/serve/test_serve_model_builder_inference_component_happy.py b/tests/integ/sagemaker/serve/test_serve_model_builder_inference_component_happy.py
@@ -41,7 +41,11 @@
 
 LLAMA_2_7B_JS_ID = "meta-textgeneration-llama-2-7b"
 LLAMA_IC_NAME = "llama2-mb-ic"
-INSTANCE_TYPE = "ml.g5.24xlarge"
+# ml.g5.24xlarge (4x A10G) is chronically capacity-constrained in us-west-2 and
+# made this test flaky with InsufficientInstanceCapacity / deploy timeouts. This
+# test exercises ModelBuilder's inference-component orchestration, not large-GPU
+# hosting, so a single-accelerator instance with ample capacity is sufficient.
+INSTANCE_TYPE = "ml.g5.2xlarge"
 
 
 @pytest.fixture
@@ -52,7 +56,7 @@ def model_builder_llama_inference_component():
         model_version="4.*",
         schema_builder=SchemaBuilder(sample_input, sample_output),
         resource_requirements=ResourceRequirements(
-            requests={"memory": 98304, "num_accelerators": 4, "copies": 1, "num_cpus": 40}
+            requests={"memory": 24576, "num_accelerators": 1, "copies": 1, "num_cpus": 8}
         ),
     )