Skip to content

Commit e7ca735

Browse files
committed
Enable PT 2.4 Training Autopatch
1 parent 04b4f9c commit e7ca735

File tree

2 files changed

+10
-99
lines changed

2 files changed

+10
-99
lines changed

dlc_developer_config.toml

Lines changed: 7 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,167 +1,78 @@
11
[dev]
2-
# Set to "huggingface", for example, if you are a huggingface developer. Default is ""
32
partner_developer = ""
4-
# Please only set it to true if you are preparing an EI related PR
5-
# Do remember to revert it back to false before merging any PR (including EI dedicated PR)
63
ei_mode = false
7-
# Please only set it to true if you are preparing a NEURON related PR
8-
# Do remember to revert it back to false before merging any PR (including NEURON dedicated PR)
94
neuron_mode = false
10-
# Please only set it to true if you are preparing a NEURONX related PR
11-
# Do remember to revert it back to false before merging any PR (including NEURONX dedicated PR)
125
neuronx_mode = false
13-
# Please only set it to true if you are preparing a GRAVITON related PR
14-
# Do remember to revert it back to false before merging any PR (including GRAVITON dedicated PR)
156
graviton_mode = false
16-
# Please only set it to true if you are preparing a ARM64 related PR
17-
# Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR)
187
arm64_mode = false
19-
# Please only set it to True if you are preparing a HABANA related PR
20-
# Do remember to revert it back to False before merging any PR (including HABANA dedicated PR)
218
habana_mode = false
22-
# Please only set it to True if you are preparing a HUGGINGFACE TRCOMP related PR
23-
# Do remember to revert it back to False before merging any PR (including HUGGINGFACE TRCOMP dedicated PR)
24-
# This mode is used to build TF 2.6 and PT1.11 DLC
259
huggingface_trcomp_mode = false
26-
# Please only set it to True if you are preparing a TRCOMP related PR
27-
# Do remember to revert it back to False before merging any PR (including TRCOMP dedicated PR)
28-
# This mode is used to build PT1.12 and above DLC
2910
trcomp_mode = false
30-
# Set deep_canary_mode to true to simulate Deep Canary Test conditions on PR for all frameworks in the
31-
# build_frameworks list below. This will cause all image builds and non-deep-canary tests on the PR to be skipped,
32-
# regardless of whether they are enabled or disabled below.
33-
# Set graviton_mode/arm64_mode to true to run Deep Canaries on Graviton/ARM64 images.
34-
# Do remember to revert it back to false before merging any PR.
3511
deep_canary_mode = false
3612

3713
[build]
38-
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
39-
# available frameworks - ["autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "mxnet", "pytorch", "stabilityai_pytorch"]
40-
build_frameworks = []
41-
42-
# By default we build both training and inference containers. Set true/false values to determine which to build.
14+
build_frameworks = [ "pytorch",]
4315
build_training = true
44-
build_inference = true
45-
46-
# Set do_build to "false" to skip builds and test the latest image built by this PR
47-
# Note: at least one build is required to set do_build to "false"
16+
build_inference = false
4817
do_build = true
4918

5019
[notify]
51-
### Notify on test failures
52-
### Off by default
5320
notify_test_failures = false
54-
# Valid values: medium or high
55-
notification_severity = "medium"
21+
notification_severity = "medium"
5622

5723
[test]
58-
### On by default
5924
sanity_tests = true
6025
security_tests = true
61-
safety_check_test = false
62-
ecr_scan_allowlist_feature = false
26+
safety_check_test = false
27+
ecr_scan_allowlist_feature = false
6328
ecs_tests = true
6429
eks_tests = true
6530
ec2_tests = true
66-
# Set it to true if you are preparing a Benchmark related PR
6731
ec2_benchmark_tests = false
68-
69-
### Set ec2_tests_on_heavy_instances = true to be able to run any EC2 tests that use large/expensive instance types by
70-
### default. If false, these types of tests will be skipped while other tests will run as usual.
71-
### These tests are run in EC2 test jobs, so ec2_tests must be true if ec2_tests_on_heavy_instances is true.
72-
### Off by default (set to false)
7332
ec2_tests_on_heavy_instances = false
74-
75-
### SM specific tests
76-
### On by default
7733
sagemaker_local_tests = true
78-
79-
# run standard sagemaker remote tests from test/sagemaker_tests
8034
sagemaker_remote_tests = true
81-
# run efa sagemaker tests
8235
sagemaker_efa_tests = false
83-
# run release_candidate_integration tests
8436
sagemaker_rc_tests = false
85-
# run sagemaker benchmark tests
8637
sagemaker_benchmark_tests = false
87-
88-
# SM remote EFA test instance type
8938
sagemaker_remote_efa_instance_type = ""
90-
91-
# Run CI tests for nightly images
92-
# false by default
9339
nightly_pr_test_mode = false
94-
9540
use_scheduler = false
9641

9742
[buildspec_override]
98-
# Assign the path to the required buildspec file from the deep-learning-containers folder
99-
# For example:
100-
# dlc-pr-tensorflow-2-habana-training = "habana/tensorflow/training/buildspec-2-10.yml"
101-
# dlc-pr-pytorch-inference = "pytorch/inference/buildspec-1-12.yml"
102-
# Setting the buildspec file path to "" allows the image builder to choose the default buildspec file.
103-
104-
### TRAINING PR JOBS ###
105-
106-
# Standard Framework Training
10743
dlc-pr-mxnet-training = ""
108-
dlc-pr-pytorch-training = ""
44+
dlc-pr-pytorch-training = "pytorch/training/buildspec-2-4-sm.yml"
10945
dlc-pr-tensorflow-2-training = ""
11046
dlc-pr-autogluon-training = ""
111-
112-
# HuggingFace Training
11347
dlc-pr-huggingface-tensorflow-training = ""
11448
dlc-pr-huggingface-pytorch-training = ""
115-
116-
# Training Compiler
11749
dlc-pr-huggingface-pytorch-trcomp-training = ""
11850
dlc-pr-huggingface-tensorflow-2-trcomp-training = ""
11951
dlc-pr-pytorch-trcomp-training = ""
120-
121-
# Neuron Training
12252
dlc-pr-mxnet-neuron-training = ""
12353
dlc-pr-pytorch-neuron-training = ""
12454
dlc-pr-tensorflow-2-neuron-training = ""
125-
126-
# Stability AI Training
12755
dlc-pr-stabilityai-pytorch-training = ""
128-
129-
# Habana Training
13056
dlc-pr-pytorch-habana-training = ""
13157
dlc-pr-tensorflow-2-habana-training = ""
132-
133-
### INFERENCE PR JOBS ###
134-
135-
# Standard Framework Inference
13658
dlc-pr-mxnet-inference = ""
13759
dlc-pr-pytorch-inference = ""
13860
dlc-pr-tensorflow-2-inference = ""
13961
dlc-pr-autogluon-inference = ""
140-
141-
# Neuron Inference
14262
dlc-pr-mxnet-neuron-inference = ""
14363
dlc-pr-pytorch-neuron-inference = ""
14464
dlc-pr-tensorflow-1-neuron-inference = ""
14565
dlc-pr-tensorflow-2-neuron-inference = ""
146-
147-
# HuggingFace Inference
14866
dlc-pr-huggingface-tensorflow-inference = ""
14967
dlc-pr-huggingface-pytorch-inference = ""
15068
dlc-pr-huggingface-pytorch-neuron-inference = ""
151-
152-
# Stability AI Inference
15369
dlc-pr-stabilityai-pytorch-inference = ""
154-
155-
# Graviton Inference
15670
dlc-pr-mxnet-graviton-inference = ""
15771
dlc-pr-pytorch-graviton-inference = ""
15872
dlc-pr-tensorflow-2-graviton-inference = ""
159-
160-
# ARM64 Inference
16173
dlc-pr-pytorch-arm64-inference = ""
16274
dlc-pr-tensorflow-2-arm64-inference = ""
163-
164-
# EIA Inference
16575
dlc-pr-mxnet-eia-inference = ""
16676
dlc-pr-pytorch-eia-inference = ""
16777
dlc-pr-tensorflow-2-eia-inference = ""
78+

pytorch/training/buildspec-2-4-sm.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ framework: &FRAMEWORK pytorch
55
version: &VERSION 2.4.0
66
short_version: &SHORT_VERSION "2.4"
77
arch_type: x86
8-
autopatch_build: "False"
8+
autopatch_build: "True"
99

1010
repository_info:
1111
training_repository: &TRAINING_REPOSITORY
@@ -41,7 +41,7 @@ images:
4141
tag_python_version: &TAG_PYTHON_VERSION py311
4242
os_version: &OS_VERSION ubuntu22.04
4343
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
44-
# latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
44+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
4545
# build_tag_override: "beta:2.4.0-cpu-py311-ubuntu22.04-sagemaker"
4646
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
4747
target: sagemaker
@@ -57,7 +57,7 @@ images:
5757
cuda_version: &CUDA_VERSION cu124
5858
os_version: &OS_VERSION ubuntu22.04
5959
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
60-
# latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
60+
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
6161
# build_tag_override: "beta:2.4.0-gpu-py311-cu124-ubuntu22.04-sagemaker"
6262
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
6363
*DEVICE_TYPE ]

0 commit comments

Comments
 (0)