Skip to content

Commit 73dfc44

Browse files
author
Bhanu Teja Goshikonda
committed
Fix torch 2.10 version pinning and increase CPU image size baseline
- Split pip install into separate commands to prevent dependency resolver from downgrading torch 2.10.0 to 2.9.1 - Add torch version constraint when installing fastai/accelerate/spacy - Increase CPU image_size_baseline from 7200 to 12000 in buildspec files
1 parent 20a7fe6 commit 73dfc44

File tree

4 files changed

+28
-16
lines changed

4 files changed

+28
-16
lines changed

pytorch/training/buildspec-2-10-ec2.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ images:
4444
BuildEC2CPUPTTrainPy3DockerImage:
4545
<<: *TRAINING_REPOSITORY
4646
build: &PYTORCH_CPU_TRAINING_PY3 false
47-
image_size_baseline: 7200
47+
image_size_baseline: 12000
4848
device_type: &DEVICE_TYPE cpu
4949
python_version: &DOCKER_PYTHON_VERSION py3
5050
tag_python_version: &TAG_PYTHON_VERSION py313

pytorch/training/buildspec-2-10-sm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ images:
4444
BuildSageMakerCPUPTTrainPy3DockerImage:
4545
<<: *TRAINING_REPOSITORY
4646
build: &PYTORCH_CPU_TRAINING_PY3 false
47-
image_size_baseline: 7200
47+
image_size_baseline: 12000
4848
device_type: &DEVICE_TYPE cpu
4949
python_version: &DOCKER_PYTHON_VERSION py3
5050
tag_python_version: &TAG_PYTHON_VERSION py313

pytorch/training/docker/2.10/py3/Dockerfile.cpu

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,20 +197,26 @@ RUN pip install --no-cache-dir \
197197
pytz \
198198
tzdata
199199

200-
# Install PyTorch
200+
# Install PyTorch - split into separate commands to prevent dependency resolver from downgrading torch
201201
RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \
202202
torchvision==${TORCHVISION_VERSION} \
203203
torchaudio==${TORCHAUDIO_VERSION} \
204-
--index-url https://download.pytorch.org/whl/cpu \
205-
&& pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \
204+
--index-url https://download.pytorch.org/whl/cpu
205+
206+
# Install torch ecosystem packages
207+
RUN pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \
206208
torchdata==${TORCHDATA_VERSION} \
207-
s3torchconnector \
209+
s3torchconnector
210+
211+
# Install ML packages with torch version constraint to prevent downgrade
212+
# pin numpy requirement for fastai dependency
213+
# requires explicit declaration of spacy, thinc, blis
214+
# pin thinc due to incompatibility with numpy 1.26.4 (sagemaker doesn't support latest numpy)
215+
RUN pip install --no-cache-dir -U \
216+
"torch==${PYTORCH_VERSION}" \
208217
fastai \
209218
accelerate \
210-
# pin numpy requirement for fastai dependency
211-
# requires explicit declaration of spacy, thic, blis
212219
spacy \
213-
# pin thinc due to incompatibility with numpy 1.26.4 (sagemaker doesn't support latest numpy)
214220
thinc \
215221
blis \
216222
numpy \

pytorch/training/docker/2.10/py3/cu130/Dockerfile.gpu

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,21 +116,27 @@ RUN pip install --no-cache-dir \
116116
pytz \
117117
tzdata
118118

119-
# Install PyTorch
119+
# Install PyTorch - split into separate commands to prevent dependency resolver from downgrading torch
120120
RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \
121121
torchvision==${TORCHVISION_VERSION} \
122122
torchaudio==${TORCHAUDIO_VERSION} \
123-
--index-url https://download.pytorch.org/whl/cu130 \
124-
&& pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \
123+
--index-url https://download.pytorch.org/whl/cu130
124+
125+
# Install torch ecosystem packages
126+
RUN pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \
125127
torchdata==${TORCHDATA_VERSION} \
126128
triton \
127-
s3torchconnector \
129+
s3torchconnector
130+
131+
# Install ML packages with torch version constraint to prevent downgrade
132+
# pin numpy requirement for fastai dependency
133+
# requires explicit declaration of spacy, thinc, blis
134+
# pin thinc due to incompatibility with numpy 1.26.4 (sagemaker doesn't support latest numpy)
135+
RUN pip install --no-cache-dir -U \
136+
"torch==${PYTORCH_VERSION}" \
128137
fastai \
129138
accelerate \
130-
# pin numpy requirement for fastai dependency
131-
# requires explicit declaration of spacy, thic, blis
132139
spacy \
133-
# pin thinc due to incompatibility with numpy 1.26.4 (sagemaker doesn't support latest numpy)
134140
thinc \
135141
blis \
136142
numpy \

0 commit comments

Comments
 (0)