Skip to content

Commit fa5a04e

Browse files
committed
keep debug log, rebuild sm image with released base dlc
1 parent 74be69b commit fa5a04e

File tree

2 files changed

+19
-20
lines changed

2 files changed

+19
-20
lines changed

dlc_developer_config.toml

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@ build_frameworks = ["pytorch"]
4141

4242

4343
# By default we build both training and inference containers. Set true/false values to determine which to build.
44-
build_training = true
45-
build_inference = false
44+
build_training = true
45+
build_inference = false
4646

4747
# Set do_build to "false" to skip builds and test the latest image built by this PR
4848
# Note: at least one build is required to set do_build to "false"
49-
do_build = true
49+
do_build = true
5050

5151
[notify]
5252
### Notify on test failures
@@ -57,24 +57,24 @@ notify_test_failures = false
5757

5858
[test]
5959
### On by default
60-
sanity_tests = true
61-
security_tests = true
62-
safety_check_test = true
63-
ecr_scan_allowlist_feature = true
64-
ecs_tests = true
65-
eks_tests = true
66-
ec2_tests = true
60+
sanity_tests = true
61+
security_tests = true
62+
safety_check_test = true
63+
ecr_scan_allowlist_feature = true
64+
ecs_tests = true
65+
eks_tests = true
66+
ec2_tests = true
6767
# Set it to true if you are preparing a Benchmark related PR
68-
ec2_benchmark_tests = true
68+
ec2_benchmark_tests = true
6969

7070
### Set ec2_tests_on_heavy_instances = true to be able to run any EC2 tests that use large/expensive instance types by
7171
### default. If false, these types of tests will be skipped while other tests will run as usual.
7272
### These tests are run in EC2 test jobs, so ec2_tests must be true if ec2_tests_on_heavy_instances is true.
7373
### Off by default (set to false)
74-
ec2_tests_on_heavy_instances = true
74+
ec2_tests_on_heavy_instances = true
7575
### SM specific tests
7676
### On by default
77-
sagemaker_local_tests = true
77+
sagemaker_local_tests = true
7878
### Set enable_ipv6 = true to run tests with IPv6-enabled resources
7979
### Off by default (set to false)
8080
enable_ipv6 = false
@@ -92,13 +92,13 @@ enable_ipv6 = false
9292
ipv6_vpc_name = ""
9393

9494
# run standard sagemaker remote tests from test/sagemaker_tests
95-
sagemaker_remote_tests = true
95+
sagemaker_remote_tests = true
9696
# run efa sagemaker tests
97-
sagemaker_efa_tests = true
97+
sagemaker_efa_tests = true
9898
# run release_candidate_integration tests
99-
sagemaker_rc_tests = true
99+
sagemaker_rc_tests = true
100100
# run sagemaker benchmark tests
101-
sagemaker_benchmark_tests = true
101+
sagemaker_benchmark_tests = true
102102

103103
# SM remote EFA test instance type
104104
sagemaker_remote_efa_instance_type = ""
@@ -122,7 +122,7 @@ use_scheduler = false
122122
dlc-pr-base = ""
123123

124124
# Standard Framework Training
125-
dlc-pr-pytorch-training = "pytorch/training/buildspec-2-8-sm.yml"
125+
dlc-pr-pytorch-training = "pytorch/training/buildspec-2-8-sm.yml"
126126
dlc-pr-tensorflow-2-training = ""
127127
dlc-pr-autogluon-training = ""
128128

pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ ARG GDRCOPY_VERSION=2.5
1111
ARG TE_VERSION=2.5
1212
ARG FLASH_ATTN_VERSION=2.8.2
1313

14-
# FROM public.ecr.aws/deep-learning-containers/base:12.9.1-gpu-py312-ubuntu22.04-ec2 AS base_image
15-
FROM 669063966089.dkr.ecr.us-west-2.amazonaws.com/pr-base:12.9.1-gpu-py312-cu129-ubuntu22.04-ec2-pr-5186-2025-08-19-23-05-36 AS base_image
14+
FROM public.ecr.aws/deep-learning-containers/base:12.9.1-gpu-py312-ubuntu22.04-ec2 AS base_image
1615
# base has EFA, PYTHON and CUDA 12.9
1716

1817
# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20

0 commit comments

Comments
 (0)