Skip to content

Commit d912e41

Browse files
committed
https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#logsV2:log-groups/log-group/$252Faws$252Fcodebuild$252Fsagemaker-python-sdk-ci-integ-tests/log-events/e558697a-488d-4eab-a4ad-2971d9a1081f
1 parent 855a3c7 commit d912e41

3 files changed

Lines changed: 14 additions & 3 deletions

File tree

tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
download_inference_assets,
3535
get_sm_session,
3636
get_tabular_data,
37+
x_fail_if_ice,
3738
)
3839

3940
INF2_SUPPORTED_REGIONS = {
@@ -192,6 +193,7 @@ def test_jumpstart_gated_model(setup):
192193
assert response is not None
193194

194195

196+
@x_fail_if_ice
195197
def test_jumpstart_gated_model_inference_component_enabled(setup):
196198

197199
model_id = "meta-textgeneration-llama-2-7b"

tests/integ/sagemaker/jumpstart/utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,12 @@ def wrapper(*args, **kwargs):
8080
try:
8181
return func(*args, **kwargs)
8282
except Exception as e:
83-
if "CapacityError" in str(e):
83+
# Insufficient capacity is a transient, region-level AWS condition
84+
# (no instances available right now), not a SDK defect. SageMaker
85+
# surfaces it either as a "CapacityError" or as an endpoint failure
86+
# whose reason contains "InsufficientInstanceCapacity"; treat both as
87+
# an expected failure so canaries don't go red on capacity shortages.
88+
if "CapacityError" in str(e) or "InsufficientInstanceCapacity" in str(e):
8489
pytest.xfail(str(e))
8590
raise
8691

tests/integ/sagemaker/serve/test_serve_model_builder_inference_component_happy.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,11 @@
4141

4242
LLAMA_2_7B_JS_ID = "meta-textgeneration-llama-2-7b"
4343
LLAMA_IC_NAME = "llama2-mb-ic"
44-
INSTANCE_TYPE = "ml.g5.24xlarge"
44+
# ml.g5.24xlarge (4x A10G) is chronically capacity-constrained in us-west-2 and
45+
# made this test flaky with InsufficientInstanceCapacity / deploy timeouts. This
46+
# test exercises ModelBuilder's inference-component orchestration, not large-GPU
47+
# hosting, so a single-accelerator instance with ample capacity is sufficient.
48+
INSTANCE_TYPE = "ml.g5.2xlarge"
4549

4650

4751
@pytest.fixture
@@ -52,7 +56,7 @@ def model_builder_llama_inference_component():
5256
model_version="4.*",
5357
schema_builder=SchemaBuilder(sample_input, sample_output),
5458
resource_requirements=ResourceRequirements(
55-
requests={"memory": 98304, "num_accelerators": 4, "copies": 1, "num_cpus": 40}
59+
requests={"memory": 24576, "num_accelerators": 1, "copies": 1, "num_cpus": 8}
5660
),
5761
)
5862

0 commit comments

Comments
 (0)