Skip to content
4 changes: 2 additions & 2 deletions pytorch/training/buildspec-2-7-sm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ images:
tag_python_version: &TAG_PYTHON_VERSION py312
os_version: &OS_VERSION ubuntu22.04
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# skip_build: "False"
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
target: sagemaker
Expand All @@ -66,7 +66,7 @@ images:
cuda_version: &CUDA_VERSION cu128
os_version: &OS_VERSION ubuntu22.04
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# skip_build: "False"
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
*DEVICE_TYPE ]
Expand Down
4 changes: 2 additions & 2 deletions pytorch/training/buildspec-2-8-sm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ images:
tag_python_version: &TAG_PYTHON_VERSION py312
os_version: &OS_VERSION ubuntu22.04
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# skip_build: "False"
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
target: sagemaker
Expand All @@ -66,7 +66,7 @@ images:
cuda_version: &CUDA_VERSION cu129
os_version: &OS_VERSION ubuntu22.04
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
# skip_build: "False"
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
*DEVICE_TYPE ]
Expand Down
8 changes: 4 additions & 4 deletions pytorch/training/docker/2.7/py3/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \
#thinc 8.3.6 is not compatible with numpy 1.26.4 (sagemaker doesn't support latest numpy)
thinc==8.3.4 \
blis \
typer \
typer \
langcodes \
language_data \
numpy \
Expand Down Expand Up @@ -325,16 +325,16 @@ RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \
thinc==8.3.4 \
blis \
numpy \
typer \
typer \
langcodes \
language_data \
&& pip uninstall -y dataclasses

# Install SM packages
RUN pip install --no-cache-dir -U \
# address package regression caused by smclarify depedency s3fs"
"awscli<=1.42.61" \
"boto3<=1.40.61" \
awscli \
boto3 \
smclarify \
"sagemaker>=2.9.0,<3" \
"sagemaker-experiments<1" \
Expand Down
10 changes: 5 additions & 5 deletions pytorch/training/docker/2.7/py3/cu128/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \
--index-url https://download.pytorch.org/whl/cu128 \
&& pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \
torchdata==${TORCHDATA_VERSION} \
triton
triton

# Install GDRCopy which is a dependency of SM Distributed DataParallel binary
# The test binaries requires cuda driver library which could be found in conda
Expand Down Expand Up @@ -126,7 +126,7 @@ RUN pip install --no-cache-dir \
psutil \
ipython \
ipykernel \
"pillow>=11.3.0" \
"pillow>=12.1.1" \
h5py \
"fsspec>=2025.7.0" \
"markupsafe>=3.0.2" \
Expand All @@ -152,7 +152,7 @@ RUN pip install --no-cache-dir \
#thinc 8.3.6 is not compatible with numpy 1.26.4 (sagemaker doesn't support latest numpy)
"thinc==8.3.4" \
blis \
typer \
typer \
langcodes \
language_data \
"jinja2>=3.1.6"\
Expand Down Expand Up @@ -204,8 +204,8 @@ RUN chmod +x /usr/local/bin/start_with_right_hostname.sh
# Install SM packages
RUN pip install --no-cache-dir -U \
# address package regression caused by smclarify depedency s3fs"
"awscli<=1.42.61" \
"boto3<=1.40.61" \
awscli \
boto3 \
smclarify \
"sagemaker>=2.9.0,<3" \
"sagemaker-experiments<1" \
Expand Down
4 changes: 2 additions & 2 deletions pytorch/training/docker/2.8/py3/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ WORKDIR /
# Install SM packages
RUN pip install --no-cache-dir -U \
smclarify \
"sagemaker==2.254.1" \
"sagemaker>2.256,<3" \
sagemaker-experiments \
sagemaker-pytorch-training \
sagemaker-training \
Expand All @@ -305,7 +305,7 @@ RUN pip install --no-cache-dir -U \
seaborn \
shap \
cloudpickle \
"langcodes<=3.5.0"
langcodes

# Copy workaround script for incorrect hostname
COPY changehostname.c /
Expand Down
10 changes: 5 additions & 5 deletions pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -252,15 +252,15 @@ WORKDIR /
# Install SM packages
RUN pip install --no-cache-dir -U \
# address package regression caused by smclarify depedency s3fs"
"awscli<1.42.50" \
"boto3<1.40.50" \
awscli \
boto3 \
smclarify \
"sagemaker==2.254.1" \
"sagemaker>2.256,<3" \
sagemaker-experiments \
sagemaker-pytorch-training \
sagemaker-training \
# Add sniffio explicitly as it's not included in sagemaker==2.254.1 dependencies
sniffio
sniffio

# Install extra packages
RUN pip install --no-cache-dir -U \
Expand All @@ -273,7 +273,7 @@ RUN pip install --no-cache-dir -U \
scikit-learn \
seaborn \
cloudpickle \
"langcodes<=3.5.0"
langcodes

COPY setup_oss_compliance.sh setup_oss_compliance.sh
RUN bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh
Expand Down