diff --git a/pytorch/training/buildspec-2-7-sm.yml b/pytorch/training/buildspec-2-7-sm.yml index e06db1978b35..8c04bf6cf7ee 100644 --- a/pytorch/training/buildspec-2-7-sm.yml +++ b/pytorch/training/buildspec-2-7-sm.yml @@ -50,7 +50,7 @@ images: tag_python_version: &TAG_PYTHON_VERSION py312 os_version: &OS_VERSION ubuntu22.04 tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] - latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] + # latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] # skip_build: "False" docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ] target: sagemaker @@ -66,7 +66,7 @@ images: cuda_version: &CUDA_VERSION cu128 os_version: &OS_VERSION ubuntu22.04 tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ] - latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ] + # latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ] # skip_build: "False" docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ] diff --git a/pytorch/training/buildspec-2-8-sm.yml b/pytorch/training/buildspec-2-8-sm.yml index ecbd248c3648..7aa9be2588a9 100644 --- a/pytorch/training/buildspec-2-8-sm.yml +++ b/pytorch/training/buildspec-2-8-sm.yml @@ -50,7 +50,7 @@ images: tag_python_version: &TAG_PYTHON_VERSION py312 os_version: &OS_VERSION ubuntu22.04 tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] - latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] + # latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] # skip_build: "False" docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ] target: sagemaker @@ -66,7 +66,7 @@ images: cuda_version: &CUDA_VERSION cu129 os_version: &OS_VERSION ubuntu22.04 tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ] - latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ] + # latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ] # skip_build: "False" docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ] diff --git a/pytorch/training/docker/2.7/py3/Dockerfile.cpu b/pytorch/training/docker/2.7/py3/Dockerfile.cpu index e94ae0b2adf3..4924eebc85d1 100644 --- a/pytorch/training/docker/2.7/py3/Dockerfile.cpu +++ b/pytorch/training/docker/2.7/py3/Dockerfile.cpu @@ -251,7 +251,7 @@ RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ #thinc 8.3.6 is not compatible with numpy 1.26.4 (sagemaker doesn't support latest numpy) thinc==8.3.4 \ blis \ - typer \ + typer \ langcodes \ language_data \ numpy \ @@ -325,7 +325,7 @@ RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ thinc==8.3.4 \ blis \ numpy \ - typer \ + typer \ langcodes \ language_data \ && pip uninstall -y dataclasses @@ -333,8 +333,8 @@ RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ # Install SM packages RUN pip install --no-cache-dir -U \ # address package regression caused by smclarify depedency s3fs" - "awscli<=1.42.61" \ - "boto3<=1.40.61" \ + awscli \ + boto3 \ smclarify \ "sagemaker>=2.9.0,<3" \ "sagemaker-experiments<1" \ diff --git a/pytorch/training/docker/2.7/py3/cu128/Dockerfile.gpu b/pytorch/training/docker/2.7/py3/cu128/Dockerfile.gpu index 1969a9f3afb8..d4545d947e53 100644 --- a/pytorch/training/docker/2.7/py3/cu128/Dockerfile.gpu +++ b/pytorch/training/docker/2.7/py3/cu128/Dockerfile.gpu @@ -71,7 +71,7 @@ RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ --index-url https://download.pytorch.org/whl/cu128 \ && pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \ torchdata==${TORCHDATA_VERSION} \ - triton + triton # Install GDRCopy which is a dependency of SM Distributed DataParallel binary # The test binaries requires cuda driver library which could be found in conda @@ -126,7 +126,7 @@ RUN pip install --no-cache-dir \ psutil \ ipython \ ipykernel \ - "pillow>=11.3.0" \ + "pillow>=12.1.1" \ h5py \ "fsspec>=2025.7.0" \ "markupsafe>=3.0.2" \ @@ -152,7 +152,7 @@ RUN pip install --no-cache-dir \ #thinc 8.3.6 is not compatible with numpy 1.26.4 (sagemaker doesn't support latest numpy) "thinc==8.3.4" \ blis \ - typer \ + typer \ langcodes \ language_data \ "jinja2>=3.1.6"\ @@ -204,8 +204,8 @@ RUN chmod +x /usr/local/bin/start_with_right_hostname.sh # Install SM packages RUN pip install --no-cache-dir -U \ # address package regression caused by smclarify depedency s3fs" - "awscli<=1.42.61" \ - "boto3<=1.40.61" \ + awscli \ + boto3 \ smclarify \ "sagemaker>=2.9.0,<3" \ "sagemaker-experiments<1" \ diff --git a/pytorch/training/docker/2.8/py3/Dockerfile.cpu b/pytorch/training/docker/2.8/py3/Dockerfile.cpu index 19c22cc2b82a..74e00704271c 100644 --- a/pytorch/training/docker/2.8/py3/Dockerfile.cpu +++ b/pytorch/training/docker/2.8/py3/Dockerfile.cpu @@ -287,7 +287,7 @@ WORKDIR / # Install SM packages RUN pip install --no-cache-dir -U \ smclarify \ - "sagemaker==2.254.1" \ + "sagemaker>2.256,<3" \ sagemaker-experiments \ sagemaker-pytorch-training \ sagemaker-training \ @@ -305,7 +305,7 @@ RUN pip install --no-cache-dir -U \ seaborn \ shap \ cloudpickle \ - "langcodes<=3.5.0" + langcodes # Copy workaround script for incorrect hostname COPY changehostname.c / diff --git a/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu b/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu index 268d8c08401c..8fa8b4bc7a0d 100644 --- a/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu +++ b/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu @@ -252,15 +252,15 @@ WORKDIR / # Install SM packages RUN pip install --no-cache-dir -U \ # address package regression caused by smclarify depedency s3fs" - "awscli<1.42.50" \ - "boto3<1.40.50" \ + awscli \ + boto3 \ smclarify \ - "sagemaker==2.254.1" \ + "sagemaker>2.256,<3" \ sagemaker-experiments \ sagemaker-pytorch-training \ sagemaker-training \ # Add sniffio explicitly as it's not included in sagemaker==2.254.1 dependencies - sniffio + sniffio # Install extra packages RUN pip install --no-cache-dir -U \ @@ -273,7 +273,7 @@ RUN pip install --no-cache-dir -U \ scikit-learn \ seaborn \ cloudpickle \ - "langcodes<=3.5.0" + langcodes COPY setup_oss_compliance.sh setup_oss_compliance.sh RUN bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh