From 282f49cabf5158a540f7e30d52535a9d6f468395 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 10:39:12 -0700 Subject: [PATCH 01/49] test python upgrade --- dlc_developer_config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index bce20ebd1eb7..f08b849ecdf6 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] +build_frameworks = ["vllm"] # By default we build both training and inference containers. Set true/false values to determine which to build. @@ -182,4 +182,4 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "" \ No newline at end of file +dlc-pr-vllm = "vllm/buildspec.yml" \ No newline at end of file From 596f0dd26c9e46277305bb8e3432a30bfb4445eb Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 12:05:28 -0700 Subject: [PATCH 02/49] downgrade invoke --- src/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/requirements.txt b/src/requirements.txt index 048b1148450c..d0885dbf5035 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,7 +1,7 @@ awscli==1.38.9 boto3==1.37.9 fabric -invoke +invoke<2 wheel==0.44.0 docker==6.1.3 pyfiglet==0.8.post1 From d27792f97bbdf12c5d1487fe7c3dc285c8e67f01 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 12:35:18 -0700 Subject: [PATCH 03/49] pin version --- src/requirements.txt | 2 +- src/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/requirements.txt b/src/requirements.txt index d0885dbf5035..e5d9738e1641 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,7 +1,7 @@ awscli==1.38.9 boto3==1.37.9 fabric -invoke<2 +invoke==2.2.0 wheel==0.44.0 docker==6.1.3 pyfiglet==0.8.post1 diff --git a/src/utils.py b/src/utils.py index 38285ade2577..18e6fadb8d55 100644 --- a/src/utils.py +++ b/src/utils.py @@ -22,7 +22,7 @@ import constants from botocore.exceptions import ClientError -from invoke.context import Context +from invoke import Context from codebuild_environment import get_cloned_folder_path from config import is_build_enabled, is_autopatch_build_enabled From 06f4ab34865a709e8224e4e1443025530bbcdc65 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 13:09:51 -0700 Subject: [PATCH 04/49] upgrade docker to newest version --- src/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/requirements.txt b/src/requirements.txt index e5d9738e1641..12da3c72e880 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -3,7 +3,7 @@ boto3==1.37.9 fabric invoke==2.2.0 wheel==0.44.0 -docker==6.1.3 +docker pyfiglet==0.8.post1 reprint==0.5.2 ruamel.yaml==0.18.10 From 94bf539e82a15300f6108616b0a84d57b039bd25 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 13:39:09 -0700 Subject: [PATCH 05/49] build pytorch 2.8 image with upgraded python --- dlc_developer_config.toml | 6 +++--- test/dlc_tests/conftest.py | 3 +-- test/test_utils/__init__.py | 2 +- test/vllm/ec2/test_artifacts/test_ec2.py | 10 +--------- 4 files changed, 6 insertions(+), 15 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index f08b849ecdf6..7f59b9f0344e 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = ["vllm"] +build_frameworks = ["pytorch"] # By default we build both training and inference containers. Set true/false values to determine which to build. @@ -122,7 +122,7 @@ use_scheduler = false dlc-pr-base = "" # Standard Framework Training -dlc-pr-pytorch-training = "" +dlc-pr-pytorch-training = "pytorch/training/buildspec-2-8-sm.yml" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" @@ -182,4 +182,4 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "vllm/buildspec.yml" \ No newline at end of file +dlc-pr-vllm = "" \ No newline at end of file diff --git a/test/dlc_tests/conftest.py b/test/dlc_tests/conftest.py index dc6fc2ea624e..85d699b4bfc6 100644 --- a/test/dlc_tests/conftest.py +++ b/test/dlc_tests/conftest.py @@ -804,12 +804,11 @@ def delete_s3_artifact_copy(): request.addfinalizer(delete_s3_artifact_copy) - python_version = "3.9" if is_neuron_image(request.fixturenames): # neuron still support tf1.15 and that is only there in py37 and less. # so use python3.7 for neuron python_version = "3.7" - ec2_utils.install_python_in_instance(conn, python_version=python_version) + ec2_utils.install_python_in_instance(conn, python_version=python_version) conn.run( f"aws s3 cp --recursive {test_utils.TEST_TRANSFER_S3_BUCKET}/{artifact_folder} $HOME/container_tests" diff --git a/test/test_utils/__init__.py b/test/test_utils/__init__.py index 18842c9ed8dd..22b9bd3cbf04 100644 --- a/test/test_utils/__init__.py +++ b/test/test_utils/__init__.py @@ -1699,7 +1699,7 @@ def setup_sm_benchmark_tf_train_env(resources_location, setup_tf1_env, setup_tf2 ).stdout.strip("\n") system = ctx.run("uname -s").stdout.strip("\n") sed_input_arg = "'' " if system == "Darwin" else "" - ctx.run(f"sed -i {sed_input_arg}'s/\[2, 1, 0\]/\[2, 1, 1\]/g' {estimator_location}") + ctx.run(f"sed -i {sed_input_arg}'s/\\[2, 1, 0\\]/\\[2, 1, 1\\]/g' {estimator_location}") return venv_dir diff --git a/test/vllm/ec2/test_artifacts/test_ec2.py b/test/vllm/ec2/test_artifacts/test_ec2.py index 91dd079f3f05..1f27c88aecaf 100644 --- a/test/vllm/ec2/test_artifacts/test_ec2.py +++ b/test/vllm/ec2/test_artifacts/test_ec2.py @@ -8,12 +8,7 @@ from contextlib import contextmanager from typing import Optional, Tuple -from test.test_utils.ec2 import ( - get_account_id_from_image_uri, - login_to_ecr_registry, - get_ec2_client, - install_python_in_instance, -) +from test.test_utils.ec2 import get_account_id_from_image_uri, login_to_ecr_registry, get_ec2_client from test.vllm.ec2.utils.fsx_utils import FsxSetup from test.vllm.ec2.infra.setup_ec2 import cleanup_resources, TEST_ID @@ -122,7 +117,6 @@ def test_vllm_benchmark_on_multi_node(head_connection, worker_connection, image_ raise Exception("Failed to get HF token") for conn in [head_connection, worker_connection]: - install_python_in_instance(conn, "3.10") setup_docker_image(conn, image_uri) setup_env(conn) @@ -257,8 +251,6 @@ def run_single_node_test(head_conn, image_uri): raise Exception(f"GPU setup verification failed for head node") try: - install_python_in_instance(head_conn, python_version="3.10") - response = get_secret_hf_token() hf_token = response.get("HF_TOKEN") From 9c9c62a36dbab1dd05f155b1993d531d2abf0530 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 16:20:47 -0700 Subject: [PATCH 06/49] arm64 python upgrade test --- dlc_developer_config.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 7f59b9f0344e..f8838cceac85 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = false +arm64_mode = true # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = ["pytorch"] +build_frameworks = ["vllm"] # By default we build both training and inference containers. Set true/false values to determine which to build. @@ -122,7 +122,7 @@ use_scheduler = false dlc-pr-base = "" # Standard Framework Training -dlc-pr-pytorch-training = "pytorch/training/buildspec-2-8-sm.yml" +dlc-pr-pytorch-training = "" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" @@ -182,4 +182,4 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "" \ No newline at end of file +dlc-pr-vllm = "vllm/buildspec-arm64.yml" \ No newline at end of file From c8ed0501142d67c9682dfb3448db5b6e7b2937f2 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 21:18:54 -0700 Subject: [PATCH 07/49] test autopatch x86 --- dlc_developer_config.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index f8838cceac85..3ae3e8374e8a 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = true +arm64_mode = false # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = ["vllm"] +build_frameworks = ["tensorflow"] # By default we build both training and inference containers. Set true/false values to determine which to build. @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "" +dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-18-ec2.yml" dlc-pr-autogluon-training = "" # ARM64 Training @@ -182,4 +182,4 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "vllm/buildspec-arm64.yml" \ No newline at end of file +dlc-pr-vllm = "" \ No newline at end of file From d73cbbedf88ce9f7d075420a473449b3b313fd05 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 21:56:21 -0700 Subject: [PATCH 08/49] tensorflow 2.19 rc error with new image --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 3ae3e8374e8a..0e280f3404a0 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-18-ec2.yml" +dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" dlc-pr-autogluon-training = "" # ARM64 Training From 7df0518bd208969fd0d5b4a39cef0c3923f2c664 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 23 Sep 2025 22:05:35 -0700 Subject: [PATCH 09/49] inference tf 2.19 sm - python upgrade --- dlc_developer_config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 0e280f3404a0..d4f4b9fac7ad 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" +dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" # ARM64 Training @@ -153,7 +153,7 @@ dlc-pr-tensorflow-2-habana-training = "" # Standard Framework Inference dlc-pr-pytorch-inference = "" -dlc-pr-tensorflow-2-inference = "" +dlc-pr-tensorflow-2-inference = "tensorflow/inference/buildspec-2-19-sm.yml" dlc-pr-autogluon-inference = "" # Graviton Inference From 3d6e032e499d4f8ee00d7d405de4c92e7d65e838 Mon Sep 17 00:00:00 2001 From: jkottu Date: Wed, 24 Sep 2025 09:50:44 -0700 Subject: [PATCH 10/49] test pytorch arm64 2.5 sm inference --- dlc_developer_config.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index d4f4b9fac7ad..f50ba0dde953 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = false +arm64_mode = true # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -37,11 +37,11 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = ["tensorflow"] +build_frameworks = ["pytorch"] # By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = true +build_training = false build_inference = true # Set do_build to "false" to skip builds and test the latest image built by this PR @@ -152,8 +152,8 @@ dlc-pr-tensorflow-2-habana-training = "" ### INFERENCE PR JOBS ### # Standard Framework Inference -dlc-pr-pytorch-inference = "" -dlc-pr-tensorflow-2-inference = "tensorflow/inference/buildspec-2-19-sm.yml" +dlc-pr-pytorch-inference = "pytorch/inference/buildspec-arm64-2-5-sm.yml" +dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" # Graviton Inference From 526c1f47e3b568a85a2045abd7fb9aee00d633a0 Mon Sep 17 00:00:00 2001 From: jkottu Date: Wed, 24 Sep 2025 09:59:17 -0700 Subject: [PATCH 11/49] test pytorch arm64 2.6 sm inference --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index f50ba0dde953..c80b85e80e8f 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -152,7 +152,7 @@ dlc-pr-tensorflow-2-habana-training = "" ### INFERENCE PR JOBS ### # Standard Framework Inference -dlc-pr-pytorch-inference = "pytorch/inference/buildspec-arm64-2-5-sm.yml" +dlc-pr-pytorch-inference = "pytorch/inference/buildspec-arm64-2-6-sm.yml" dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" From 803dbaca79612151efe9702044f72098e9b40d55 Mon Sep 17 00:00:00 2001 From: jkottu Date: Wed, 24 Sep 2025 11:40:49 -0700 Subject: [PATCH 12/49] correct python command --- .../mxnet/training/test_performance_mxnet_sm_training.py | 2 +- .../ec2/tensorflow/inference/test_tensorflow_inference.py | 2 +- .../ec2/tensorflow/training/test_tensorflow_training.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/dlc_tests/benchmark/sagemaker/mxnet/training/test_performance_mxnet_sm_training.py b/test/dlc_tests/benchmark/sagemaker/mxnet/training/test_performance_mxnet_sm_training.py index ae84728a3f32..767f1cbfd338 100644 --- a/test/dlc_tests/benchmark/sagemaker/mxnet/training/test_performance_mxnet_sm_training.py +++ b/test/dlc_tests/benchmark/sagemaker/mxnet/training/test_performance_mxnet_sm_training.py @@ -71,7 +71,7 @@ def test_mxnet_sagemaker_training_performance( with ctx.cd(test_dir), ctx.prefix(f"source {venv_dir}/bin/activate"): log_file = f"results-{commit_info}-{time_str}-{num_nodes}-node.txt" run_out = ctx.run( - f"timeout 90m python mx_sm_benchmark.py " + f"timeout 90m python3 mx_sm_benchmark.py " f"--framework-version {framework_version} " f"--image-uri {mxnet_training} " f"--instance-type ml.{ec2_instance_type} " diff --git a/test/dlc_tests/ec2/tensorflow/inference/test_tensorflow_inference.py b/test/dlc_tests/ec2/tensorflow/inference/test_tensorflow_inference.py index 0fc3fab46be2..65018c591ccd 100644 --- a/test/dlc_tests/ec2/tensorflow/inference/test_tensorflow_inference.py +++ b/test/dlc_tests/ec2/tensorflow/inference/test_tensorflow_inference.py @@ -483,7 +483,7 @@ def run_ec2_tensorflow_inference( def train_mnist_model(serving_folder_path, ec2_connection): ec2_connection.run(f"cd {serving_folder_path}") mnist_script_path = f"{serving_folder_path}/tensorflow_serving/example/mnist_saved_model.py" - ec2_connection.run(f"python {mnist_script_path} {serving_folder_path}/models/mnist", hide=True) + ec2_connection.run(f"python3 {mnist_script_path} {serving_folder_path}/models/mnist", hide=True) def host_setup_for_tensorflow_inference( diff --git a/test/dlc_tests/ec2/tensorflow/training/test_tensorflow_training.py b/test/dlc_tests/ec2/tensorflow/training/test_tensorflow_training.py index 5bc807f2436a..f81ce4a6e496 100644 --- a/test/dlc_tests/ec2/tensorflow/training/test_tensorflow_training.py +++ b/test/dlc_tests/ec2/tensorflow/training/test_tensorflow_training.py @@ -480,12 +480,12 @@ def test_tensorflow_io_s3_plugin_cpu( # Helper function to test data service def run_data_service_test(ec2_connection, tensorflow_training, cmd): _, tensorflow_version = test_utils.get_framework_and_version_from_tag(tensorflow_training) - ec2_connection.run(f"python -m pip install --upgrade pip") - ec2_connection.run(f"python -m pip install tensorflow=={tensorflow_version}") - ec2_connection.run(f"python -m pip install 'protobuf<4'") + ec2_connection.run(f"python3 -m pip install --upgrade pip") + ec2_connection.run(f"python3 -m pip install tensorflow=={tensorflow_version}") + ec2_connection.run(f"python3 -m pip install 'protobuf<4'") container_test_local_dir = os.path.join("$HOME", "container_tests") ec2_connection.run( - f"cd {container_test_local_dir}/bin && screen -d -m python start_dataservice.py" + f"cd {container_test_local_dir}/bin && screen -d -m python3 start_dataservice.py" ) execute_ec2_training_test(ec2_connection, tensorflow_training, cmd, host_network=True) From bbbed99a3cec01600c4de3ebf01f64465bd08865 Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 10:08:57 -0700 Subject: [PATCH 13/49] add invoke back --- src/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/requirements.txt b/src/requirements.txt index 12da3c72e880..048b1148450c 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,9 +1,9 @@ awscli==1.38.9 boto3==1.37.9 fabric -invoke==2.2.0 +invoke wheel==0.44.0 -docker +docker==6.1.3 pyfiglet==0.8.post1 reprint==0.5.2 ruamel.yaml==0.18.10 From 255ffab817ca8d412b749c03a3a9405f8ca14b25 Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 10:12:23 -0700 Subject: [PATCH 14/49] test sm local --- dlc_developer_config.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index c80b85e80e8f..b99060beec68 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = true +arm64_mode = false # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -37,12 +37,12 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = ["pytorch"] +build_frameworks = ["tensorflow"] # By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = false -build_inference = true +build_training = true +build_inference = false # Set do_build to "false" to skip builds and test the latest image built by this PR # Note: at least one build is required to set do_build to "false" @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "" +dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" dlc-pr-autogluon-training = "" # ARM64 Training @@ -152,7 +152,7 @@ dlc-pr-tensorflow-2-habana-training = "" ### INFERENCE PR JOBS ### # Standard Framework Inference -dlc-pr-pytorch-inference = "pytorch/inference/buildspec-arm64-2-6-sm.yml" +dlc-pr-pytorch-inference = "" dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" From c22454b74f0affb34344566d3fc09053c717fc5d Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 10:36:56 -0700 Subject: [PATCH 15/49] tensorflow arm64 2.19 inference --- dlc_developer_config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index b99060beec68..746734eadbd1 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = false +arm64_mode = true # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" +dlc-pr-tensorflow-2-training = "tensorflow/inference/buildspec-arm64-2-19-sm.yml" dlc-pr-autogluon-training = "" # ARM64 Training From c099ca06dfeea381c44a2f09b7b2e4284b26b6d7 Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 10:52:35 -0700 Subject: [PATCH 16/49] test tf 2.19 sm training build --- dlc_developer_config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 746734eadbd1..b99060beec68 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = true +arm64_mode = false # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "tensorflow/inference/buildspec-arm64-2-19-sm.yml" +dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" dlc-pr-autogluon-training = "" # ARM64 Training From c1c8940f08d0ec860ba7582c0d294ce82fd48c7b Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 10:54:55 -0700 Subject: [PATCH 17/49] test tf 2.19 sm arm64 inference build --- dlc_developer_config.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index b99060beec68..f720acb3af9e 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = false +arm64_mode = true # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -41,8 +41,8 @@ build_frameworks = ["tensorflow"] # By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = true -build_inference = false +build_training = false +build_inference = true # Set do_build to "false" to skip builds and test the latest image built by this PR # Note: at least one build is required to set do_build to "false" @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" +dlc-pr-tensorflow-2-training = "tensorflow/inference/buildspec-arm64-2-19-sm.yml" dlc-pr-autogluon-training = "" # ARM64 Training From 28ccfb1e7fc62eac2071cb95ab8be7060d87e370 Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 11:02:40 -0700 Subject: [PATCH 18/49] test tf 2.19 sm x86 training build --- dlc_developer_config.toml | 8 ++++---- src/requirements.txt | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index f720acb3af9e..b99060beec68 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = true +arm64_mode = false # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -41,8 +41,8 @@ build_frameworks = ["tensorflow"] # By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = false -build_inference = true +build_training = true +build_inference = false # Set do_build to "false" to skip builds and test the latest image built by this PR # Note: at least one build is required to set do_build to "false" @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "tensorflow/inference/buildspec-arm64-2-19-sm.yml" +dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" dlc-pr-autogluon-training = "" # ARM64 Training diff --git a/src/requirements.txt b/src/requirements.txt index 048b1148450c..e4db76390f53 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -3,11 +3,11 @@ boto3==1.37.9 fabric invoke wheel==0.44.0 -docker==6.1.3 +docker pyfiglet==0.8.post1 reprint==0.5.2 ruamel.yaml==0.18.10 black==24.8.0 junit-xml==1.9 toml==0.10.2 -retrying +retrying \ No newline at end of file From 4f22abd3100f283e728a2c402761589b44085d5d Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 11:28:34 -0700 Subject: [PATCH 19/49] add lexicon and six? --- src/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/requirements.txt b/src/requirements.txt index e4db76390f53..67cbac39da7c 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -10,4 +10,6 @@ ruamel.yaml==0.18.10 black==24.8.0 junit-xml==1.9 toml==0.10.2 -retrying \ No newline at end of file +retrying +lexicon +six \ No newline at end of file From ced266486bc8d9186bb0c7e6af4aef66e9ed854e Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 12:52:19 -0700 Subject: [PATCH 20/49] build tf 2.18 --- tensorflow/training/docker/2.18/py3/Dockerfile.cpu | 2 +- tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/training/docker/2.18/py3/Dockerfile.cpu b/tensorflow/training/docker/2.18/py3/Dockerfile.cpu index d9b5d9db780e..8dfdeb1f9a3e 100644 --- a/tensorflow/training/docker/2.18/py3/Dockerfile.cpu +++ b/tensorflow/training/docker/2.18/py3/Dockerfile.cpu @@ -264,7 +264,7 @@ RUN ${PIP} install --no-cache-dir -U \ "tensorflow-datasets==4.9.7" RUN $PYTHON -m pip install --no-cache-dir -U \ - numba==0.61.0 \ + numba \ bokeh \ imageio \ opencv-python \ diff --git a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu index b36bf675d946..1d57bc1710e1 100644 --- a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu +++ b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu @@ -367,7 +367,7 @@ RUN ${PIP} install --no-cache-dir -U \ "tensorflow-datasets==4.9.7" RUN $PYTHON -m pip install --no-cache-dir -U \ - numba==0.61.0 \ + numba \ bokeh \ imageio \ opencv-python \ @@ -382,8 +382,9 @@ RUN $PYTHON -m pip install --no-cache-dir -U \ RUN $PYTHON -m pip install --no-cache-dir -U \ sagemaker-experiments==0.1.45 +#pinning old version because of protobuf dependency with tensorflow-metadata RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker-training + sagemaker-training==4.8.4 RUN $PYTHON -m pip install --no-cache-dir -U \ sagemaker-tensorflow-training==20.4.1 From 7d7a983fa77ac75385b7e5bf71eaba23cb7a8944 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 11:49:46 -0700 Subject: [PATCH 21/49] remove six and lexicon --- src/requirements.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/requirements.txt b/src/requirements.txt index 67cbac39da7c..e4db76390f53 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -10,6 +10,4 @@ ruamel.yaml==0.18.10 black==24.8.0 junit-xml==1.9 toml==0.10.2 -retrying -lexicon -six \ No newline at end of file +retrying \ No newline at end of file From 911289072eb0ef6f6a114ffa8abbd3ab59d7cf93 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 13:05:54 -0700 Subject: [PATCH 22/49] test tf 2.18 sm --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index b99060beec68..8599ba8329d8 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -123,7 +123,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" +dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-18-sm.yml" dlc-pr-autogluon-training = "" # ARM64 Training From cd72de51e8a96e622091610939a69cb7fcfe4c8c Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 13:44:47 -0700 Subject: [PATCH 23/49] add uv changes --- buildspec.yml | 2 +- .../docker/2.18/py3/cu125/Dockerfile.gpu | 56 ++++++++----------- 2 files changed, 23 insertions(+), 35 deletions(-) diff --git a/buildspec.yml b/buildspec.yml index d26cc191c3eb..e203d3a620d5 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -10,7 +10,7 @@ phases: - start-dockerd - echo Logging in to Amazon ECR... - $(aws ecr get-login --no-include-email --region $AWS_DEFAULT_REGION) - - pip install -r src/requirements.txt + - uv pip install -r src/requirements.txt - bash src/setup.sh $FRAMEWORK - python src/parse_partner_developers.py - python src/send_status.py --status 2 diff --git a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu index 1d57bc1710e1..b12a6a4f327f 100644 --- a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu +++ b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu @@ -28,7 +28,8 @@ ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING=UTF-8 ENV LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 - +ENV UV_VERSION=0.8.22 +ENV UV_SYSTEM_PYTHON=1 # Set environment variables for MKL # For more about MKL with TensorFlow see: # https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn @@ -199,16 +200,18 @@ RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSIO && make install \ && rm -rf ../Python-$PYTHON_VERSION* -RUN ${PIP} --no-cache-dir install --upgrade \ - pip \ - setuptools \ - wheel +RUN curl -LsSf https://astral.sh/uv/install.sh | sh -s -- v${UV_VERSION} \ + && uv --version \ + && ln -s $(which uv) /usr/local/bin/uv \ + && uv pip install --system \ + setuptools \ + wheel # Some TF tools expect a "python" binary RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ && ln -s $(which ${PIP}) /usr/bin/pip -RUN ${PIP} install --no-cache-dir -U \ +RUN uv pip install --system \ pybind11 \ cmake \ scipy \ @@ -352,21 +355,19 @@ ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main # and this is fine since sagemaker is more important than the models and # the models still work on pyyaml 6 in this context. # Need to install wheel before we can fix the pyyaml issue below -RUN pip install wheel \ - && pip install "cython<3" "pyyaml<6" --no-build-isolation +RUN uv pip install --system \ + "cython<3" "pyyaml<6" --no-build-isolation -# https://github.com/tensorflow/models/issues/9267 -# tf-models does not respect existing installations of TF and always installs open source TF -RUN ${PIP} install --no-cache-dir -U \ +RUN uv pip install --system \ tf-models-official==2.18.0 \ tensorflow-text==2.18.1 \ - && ${PIP} uninstall -y tensorflow tensorflow-gpu \ - && ${PIP} install --no-cache-dir -U \ + && uv pip uninstall -y tensorflow tensorflow-gpu \ + && uv pip install --system \ ${TF_URL} \ "tensorflow-io==0.37.*" \ "tensorflow-datasets==4.9.7" -RUN $PYTHON -m pip install --no-cache-dir -U \ +RUN uv pip install --system \ numba \ bokeh \ imageio \ @@ -376,26 +377,13 @@ RUN $PYTHON -m pip install --no-cache-dir -U \ shap \ numpy -RUN $PYTHON -m pip install --no-cache-dir -U \ - "sagemaker<3" - -RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker-experiments==0.1.45 - -#pinning old version because of protobuf dependency with tensorflow-metadata -RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker-training==4.8.4 - - RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker-tensorflow-training==20.4.1 - -RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker-studio-analytics-extension==0.1.4 - -RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker-studio-sparkmagic-lib==0.2.0 - -RUN $PYTHON -m pip install --no-cache-dir -U \ +RUN uv pip install --system \ + "sagemaker<3" \ + sagemaker-experiments==0.1.45 \ + sagemaker-training==4.8.4 \ + sagemaker-tensorflow-training==20.4.1 \ + sagemaker-studio-analytics-extension==0.1.4 \ + sagemaker-studio-sparkmagic-lib==0.2.0 \ sparkmagic==0.21.0 \ smclarify From 5c650db13fb01a7dde34cdc1d061df5aaf2c1f15 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 14:39:29 -0700 Subject: [PATCH 24/49] rebuild with uv --- .../training/docker/2.18/py3/cu125/Dockerfile.gpu | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu index b12a6a4f327f..55b1b8dc018e 100644 --- a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu +++ b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu @@ -200,9 +200,11 @@ RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSIO && make install \ && rm -rf ../Python-$PYTHON_VERSION* -RUN curl -LsSf https://astral.sh/uv/install.sh | sh -s -- v${UV_VERSION} \ - && uv --version \ - && ln -s $(which uv) /usr/local/bin/uv \ +ENV UV_CACHE_DIR=/root/.cache/uv \ + UV_SYSTEM_PYTHON=1 + +RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ + && ln -s /root/.local/bin/uv /usr/local/bin/uv \ && uv pip install --system \ setuptools \ wheel @@ -380,7 +382,9 @@ RUN uv pip install --system \ RUN uv pip install --system \ "sagemaker<3" \ sagemaker-experiments==0.1.45 \ - sagemaker-training==4.8.4 \ + sagemaker-training==4.8.4 + +RUN uv pip install --system \ sagemaker-tensorflow-training==20.4.1 \ sagemaker-studio-analytics-extension==0.1.4 \ sagemaker-studio-sparkmagic-lib==0.2.0 \ From 0e85efc6dcdc6cfceb6f019f887b50d4697f9a49 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 14:57:12 -0700 Subject: [PATCH 25/49] install tf-datasets again with uv --- tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu index 55b1b8dc018e..c2ce0dc38e9c 100644 --- a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu +++ b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu @@ -363,7 +363,7 @@ RUN uv pip install --system \ RUN uv pip install --system \ tf-models-official==2.18.0 \ tensorflow-text==2.18.1 \ - && uv pip uninstall -y tensorflow tensorflow-gpu \ + && uv pip uninstall tensorflow tensorflow-gpu --no-confirm \ && uv pip install --system \ ${TF_URL} \ "tensorflow-io==0.37.*" \ From a5ceb18be5c4c20a4e817084a26838fceeaaba15 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 15:42:26 -0700 Subject: [PATCH 26/49] test pytorch with uv --- dlc_developer_config.toml | 6 +- .../docker/2.8/py3/cu129/Dockerfile.gpu | 25 +++++--- .../docker/2.18/py3/cu125/Dockerfile.gpu | 59 +++++++++++-------- 3 files changed, 53 insertions(+), 37 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 8599ba8329d8..ccd2abc671e6 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = ["tensorflow"] +build_frameworks = ["pytorch"] # By default we build both training and inference containers. Set true/false values to determine which to build. @@ -122,8 +122,8 @@ use_scheduler = false dlc-pr-base = "" # Standard Framework Training -dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-18-sm.yml" +dlc-pr-pytorch-training = "pytorch/training/buildspec-2-8-sm.yml" +dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" # ARM64 Training diff --git a/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu b/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu index 37a0417ceded..d0fd329e9280 100644 --- a/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu +++ b/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu @@ -53,6 +53,8 @@ ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING=UTF-8 ENV LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 +ENV UV_VERSION=0.8.22 +ENV UV_SYSTEM_PYTHON=1 ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" @@ -80,8 +82,15 @@ ENV LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/o # Python Path ENV PATH="/usr/local/bin:${PATH}" + +ENV UV_CACHE_DIR=/root/.cache/uv \ + UV_SYSTEM_PYTHON=1 + +RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ + && ln -s /root/.local/bin/uv /usr/local/bin/uv + # Install common conda packages -RUN pip install --no-cache-dir \ +RUN uv pip install --no-cache-dir \ cython \ cryptography \ pyOpenSSL \ @@ -114,11 +123,11 @@ RUN pip install --no-cache-dir \ tornado>=6.5.1 # Install PyTorch -RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ +RUN uv pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ torchvision==${TORCHVISION_VERSION} \ torchaudio==${TORCHAUDIO_VERSION} \ --index-url https://download.pytorch.org/whl/cu129 \ - && pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \ + && uv pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \ torchdata==${TORCHDATA_VERSION} \ triton \ s3torchconnector \ @@ -131,18 +140,18 @@ RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ thinc==8.3.4 \ blis \ numpy \ - && pip uninstall -y dataclasses + && uv pip uninstall dataclasses # Install flash attn and NVIDIA transformer engine. # Optionally set NVTE_FRAMEWORK to avoid bringing in additional frameworks during TE install ENV NVTE_FRAMEWORK=pytorch RUN curl -LO https://github.com/Dao-AILab/flash-attention/releases/download/v${FLASH_ATTN_VERSION}/flash_attn-${FLASH_ATTN_VERSION}+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl \ - && pip install flash_attn-${FLASH_ATTN_VERSION}+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl --no-build-isolation \ + && uv pip install flash_attn-${FLASH_ATTN_VERSION}+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl --no-build-isolation \ && rm flash_attn-${FLASH_ATTN_VERSION}+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl # Install TE using instructions from https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/installation.html -RUN pip install --no-cache-dir git+https://github.com/NVIDIA/TransformerEngine.git@release_v${TE_VERSION} --no-build-isolation +RUN uv pip install --no-cache-dir git+https://github.com/NVIDIA/TransformerEngine.git@release_v${TE_VERSION} --no-build-isolation RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.8/license.txt @@ -245,7 +254,7 @@ ARG PYTHON WORKDIR / # Install SM packages -RUN pip install --no-cache-dir -U \ +RUN uv pip install --no-cache-dir -U \ smclarify \ "sagemaker>=2" \ sagemaker-experiments \ @@ -253,7 +262,7 @@ RUN pip install --no-cache-dir -U \ sagemaker-training # Install extra packages -RUN pip install --no-cache-dir -U \ +RUN uv pip install --no-cache-dir -U \ bokeh \ imageio \ numba \ diff --git a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu index c2ce0dc38e9c..c8b17f2ca0e9 100644 --- a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu +++ b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu @@ -28,8 +28,7 @@ ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING=UTF-8 ENV LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 -ENV UV_VERSION=0.8.22 -ENV UV_SYSTEM_PYTHON=1 + # Set environment variables for MKL # For more about MKL with TensorFlow see: # https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn @@ -200,20 +199,16 @@ RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSIO && make install \ && rm -rf ../Python-$PYTHON_VERSION* -ENV UV_CACHE_DIR=/root/.cache/uv \ - UV_SYSTEM_PYTHON=1 - -RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ - && ln -s /root/.local/bin/uv /usr/local/bin/uv \ - && uv pip install --system \ - setuptools \ - wheel +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools \ + wheel # Some TF tools expect a "python" binary RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ && ln -s $(which ${PIP}) /usr/bin/pip -RUN uv pip install --system \ +RUN ${PIP} install --no-cache-dir -U \ pybind11 \ cmake \ scipy \ @@ -357,20 +352,22 @@ ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main # and this is fine since sagemaker is more important than the models and # the models still work on pyyaml 6 in this context. # Need to install wheel before we can fix the pyyaml issue below -RUN uv pip install --system \ - "cython<3" "pyyaml<6" --no-build-isolation +RUN pip install wheel \ + && pip install "cython<3" "pyyaml<6" --no-build-isolation -RUN uv pip install --system \ +# https://github.com/tensorflow/models/issues/9267 +# tf-models does not respect existing installations of TF and always installs open source TF +RUN ${PIP} install --no-cache-dir -U \ tf-models-official==2.18.0 \ tensorflow-text==2.18.1 \ - && uv pip uninstall tensorflow tensorflow-gpu --no-confirm \ - && uv pip install --system \ + && ${PIP} uninstall -y tensorflow tensorflow-gpu \ + && ${PIP} install --no-cache-dir -U \ ${TF_URL} \ "tensorflow-io==0.37.*" \ "tensorflow-datasets==4.9.7" -RUN uv pip install --system \ - numba \ +RUN $PYTHON -m pip install --no-cache-dir -U \ + numba==0.61.0 \ bokeh \ imageio \ opencv-python \ @@ -379,15 +376,25 @@ RUN uv pip install --system \ shap \ numpy -RUN uv pip install --system \ - "sagemaker<3" \ - sagemaker-experiments==0.1.45 \ - sagemaker-training==4.8.4 +RUN $PYTHON -m pip install --no-cache-dir -U \ + "sagemaker<3" + +RUN $PYTHON -m pip install --no-cache-dir -U \ + sagemaker-experiments==0.1.45 + +RUN $PYTHON -m pip install --no-cache-dir -U \ + sagemaker-training==4.8.4 + + RUN $PYTHON -m pip install --no-cache-dir -U \ + sagemaker-tensorflow-training==20.4.1 + +RUN $PYTHON -m pip install --no-cache-dir -U \ + sagemaker-studio-analytics-extension==0.1.4 + +RUN $PYTHON -m pip install --no-cache-dir -U \ + sagemaker-studio-sparkmagic-lib==0.2.0 -RUN uv pip install --system \ - sagemaker-tensorflow-training==20.4.1 \ - sagemaker-studio-analytics-extension==0.1.4 \ - sagemaker-studio-sparkmagic-lib==0.2.0 \ +RUN $PYTHON -m pip install --no-cache-dir -U \ sparkmagic==0.21.0 \ smclarify From 18436286547059336623a8c87df906e4d606be07 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 15:44:40 -0700 Subject: [PATCH 27/49] test pytorch without uv in docker image --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index ccd2abc671e6..ab12df19dfb6 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -122,7 +122,7 @@ use_scheduler = false dlc-pr-base = "" # Standard Framework Training -dlc-pr-pytorch-training = "pytorch/training/buildspec-2-8-sm.yml" +dlc-pr-pytorch-training = "pytorch/training/buildspec-2-7-ec2.yml" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" From 9b42768b801a22908ad66fd505c1cf34c1c64128 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 16:34:46 -0700 Subject: [PATCH 28/49] remove uv changes --- buildspec.yml | 2 +- .../docker/2.8/py3/cu129/Dockerfile.gpu | 25 ++++++------------- .../training/docker/2.18/py3/Dockerfile.cpu | 5 ++-- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/buildspec.yml b/buildspec.yml index e203d3a620d5..d26cc191c3eb 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -10,7 +10,7 @@ phases: - start-dockerd - echo Logging in to Amazon ECR... - $(aws ecr get-login --no-include-email --region $AWS_DEFAULT_REGION) - - uv pip install -r src/requirements.txt + - pip install -r src/requirements.txt - bash src/setup.sh $FRAMEWORK - python src/parse_partner_developers.py - python src/send_status.py --status 2 diff --git a/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu b/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu index d0fd329e9280..37a0417ceded 100644 --- a/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu +++ b/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu @@ -53,8 +53,6 @@ ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING=UTF-8 ENV LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 -ENV UV_VERSION=0.8.22 -ENV UV_SYSTEM_PYTHON=1 ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" @@ -82,15 +80,8 @@ ENV LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/o # Python Path ENV PATH="/usr/local/bin:${PATH}" - -ENV UV_CACHE_DIR=/root/.cache/uv \ - UV_SYSTEM_PYTHON=1 - -RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ - && ln -s /root/.local/bin/uv /usr/local/bin/uv - # Install common conda packages -RUN uv pip install --no-cache-dir \ +RUN pip install --no-cache-dir \ cython \ cryptography \ pyOpenSSL \ @@ -123,11 +114,11 @@ RUN uv pip install --no-cache-dir \ tornado>=6.5.1 # Install PyTorch -RUN uv pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ +RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ torchvision==${TORCHVISION_VERSION} \ torchaudio==${TORCHAUDIO_VERSION} \ --index-url https://download.pytorch.org/whl/cu129 \ - && uv pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \ + && pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \ torchdata==${TORCHDATA_VERSION} \ triton \ s3torchconnector \ @@ -140,18 +131,18 @@ RUN uv pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \ thinc==8.3.4 \ blis \ numpy \ - && uv pip uninstall dataclasses + && pip uninstall -y dataclasses # Install flash attn and NVIDIA transformer engine. # Optionally set NVTE_FRAMEWORK to avoid bringing in additional frameworks during TE install ENV NVTE_FRAMEWORK=pytorch RUN curl -LO https://github.com/Dao-AILab/flash-attention/releases/download/v${FLASH_ATTN_VERSION}/flash_attn-${FLASH_ATTN_VERSION}+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl \ - && uv pip install flash_attn-${FLASH_ATTN_VERSION}+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl --no-build-isolation \ + && pip install flash_attn-${FLASH_ATTN_VERSION}+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl --no-build-isolation \ && rm flash_attn-${FLASH_ATTN_VERSION}+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl # Install TE using instructions from https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/installation.html -RUN uv pip install --no-cache-dir git+https://github.com/NVIDIA/TransformerEngine.git@release_v${TE_VERSION} --no-build-isolation +RUN pip install --no-cache-dir git+https://github.com/NVIDIA/TransformerEngine.git@release_v${TE_VERSION} --no-build-isolation RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.8/license.txt @@ -254,7 +245,7 @@ ARG PYTHON WORKDIR / # Install SM packages -RUN uv pip install --no-cache-dir -U \ +RUN pip install --no-cache-dir -U \ smclarify \ "sagemaker>=2" \ sagemaker-experiments \ @@ -262,7 +253,7 @@ RUN uv pip install --no-cache-dir -U \ sagemaker-training # Install extra packages -RUN uv pip install --no-cache-dir -U \ +RUN pip install --no-cache-dir -U \ bokeh \ imageio \ numba \ diff --git a/tensorflow/training/docker/2.18/py3/Dockerfile.cpu b/tensorflow/training/docker/2.18/py3/Dockerfile.cpu index 8dfdeb1f9a3e..04a7eae8d4cc 100644 --- a/tensorflow/training/docker/2.18/py3/Dockerfile.cpu +++ b/tensorflow/training/docker/2.18/py3/Dockerfile.cpu @@ -264,7 +264,7 @@ RUN ${PIP} install --no-cache-dir -U \ "tensorflow-datasets==4.9.7" RUN $PYTHON -m pip install --no-cache-dir -U \ - numba \ + numba==0.61.0 \ bokeh \ imageio \ opencv-python \ @@ -279,8 +279,9 @@ RUN $PYTHON -m pip install --no-cache-dir -U \ RUN $PYTHON -m pip install --no-cache-dir -U \ sagemaker-experiments==0.1.45 +#pinning old version because of protobuf dependency with tensorflow-metadata RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker-training + sagemaker-training==4.8.4 RUN $PYTHON -m pip install --no-cache-dir -U \ sagemaker-tensorflow-training==20.4.1 From 20ee49e5f2d372781b155a8ae0bfcca62111ecdf Mon Sep 17 00:00:00 2001 From: Ahsan Khan Date: Thu, 18 Sep 2025 10:38:27 -0700 Subject: [PATCH 29/49] update neuron available images (#5281) --- available_images.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/available_images.md b/available_images.md index 010827837808..bc22fb26fabd 100644 --- a/available_images.md +++ b/available_images.md @@ -368,6 +368,8 @@ Note: Starting from Neuron SDK 2.17.0, Dockerfiles for PyTorch Neuron Containers |----------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------|--------------------|-----------|------------------------------|------------------------|----------------------------------------------------------------------------------------------------------------------| | [PyTorch 2.7.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.25.0/docker/pytorch/inference/2.7.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_inference | Neuron 2.25.0 | inference | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.7.0-neuronx-py310-sdk2.25.0-ubuntu22.04 | | [PyTorch 2.7.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.25.0/docker/pytorch/training/2.7.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_training | Neuron 2.25.0 | training | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training-neuronx:2.7.0-neuronx-py310-sdk2.25.0-ubuntu22.04 | +| [PyTorch 2.7.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.24.1/docker/pytorch/inference/2.7.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_inference | Neuron 2.24.1 | inference | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.7.0-neuronx-py310-sdk2.24.1-ubuntu22.04 | +| [PyTorch 2.7.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.24.1/docker/pytorch/training/2.7.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_training | Neuron 2.24.1 | training | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training-neuronx:2.7.0-neuronx-py310-sdk2.24.1-ubuntu22.04 | | [PyTorch 2.6.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.23.0/docker/pytorch/inference/2.6.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_inference | Neuron 2.23.0 | inference | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.6.0-neuronx-py310-sdk2.23.0-ubuntu22.04 | | [PyTorch 2.6.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.23.0/docker/pytorch/training/2.6.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_training | Neuron 2.23.0 | training | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training-neuronx:2.6.0-neuronx-py310-sdk2.23.0-ubuntu22.04 | | [PyTorch 2.5.1](https://github.com/aws-neuron/deep-learning-containers/blob/2.22.0/docker/pytorch/inference/2.5.1/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_inference | Neuron 2.22.0 | inference | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.5.1-neuronx-py310-sdk2.22.0-ubuntu22.04 | From 71df1f178108139a525e2655c8a521d7cba0c193 Mon Sep 17 00:00:00 2001 From: Jon Lee <42048018+j-w-l@users.noreply.github.com> Date: Thu, 18 Sep 2025 19:10:27 +0000 Subject: [PATCH 30/49] patch: linux-libc-dev vulnerability in huggingface-pytorch-training-neuronx (#5278) * Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': False, 'build_training': True}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-training': 'huggingface/pytorch/training/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': False, 'ecs_tests': False, 'eks_tests': False, 'sagemaker_local_tests': False, 'sagemaker_remote_tests': False, 'sanity_tests': True, 'security_tests': False}} * patch: linux-libc-dev vulnerability in huggingface-pytorch-training-neuronx:2.1.2-transformers4.48.1-neuronx-py310-sdk2.20.0-ubuntu20.04 * Restore ['dlc_developer_config.toml'] dlc_developer_config.toml: ('Restore to ' 'https://raw.githubusercontent.com/aws/deep-learning-containers/master/dlc_developer_config.toml') --------- Co-authored-by: Jonathan Lee Co-authored-by: varunmoris <176621270+varunmoris@users.noreply.github.com> --- .../Dockerfile.neuronx.os_scan_allowlist.json | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/huggingface/pytorch/training/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx.os_scan_allowlist.json b/huggingface/pytorch/training/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx.os_scan_allowlist.json index 5ccb163f9a91..fc8979f0df79 100644 --- a/huggingface/pytorch/training/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx.os_scan_allowlist.json +++ b/huggingface/pytorch/training/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx.os_scan_allowlist.json @@ -845,6 +845,35 @@ } ], "linux-libc-dev": [ + { + "description": "In the Linux kernel, the following vulnerability has been resolved: of: module: add buffer overflow check in of_modalias(). In of_modalias(), if the buffer happens to be too small even for the 1st snprintf() call, the len parameter will become negative and str parameter (if not NULL initially) will point beyond the buffer's end. Add the buffer overflow check after the 1st snprintf() call and fix such check after the strlen() call (accounting for the terminating NUL char).", + "vulnerability_id": "CVE-2024-38541", + "name": "CVE-2024-38541", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "192.212" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://ubuntu.com/security/CVE-2024-38541", + "source": "UBUNTU_CVE", + "severity": "CRITICAL", + "status": "ACTIVE", + "title": "CVE-2024-38541 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, { "description":"In the Linux kernel, the following vulnerability has been resolved: greybus: Fix use-after-free bug in gb_interface_release due to race condition. In gb_interface_create, &intf->mode_switch_completion is bound with gb_interface_mode_switch_work. Then it will be started by gb_interface_request_mode_switch. Here is the relevant code. if (!queue_work(system_long_wq, &intf->mode_switch_work)) { ... } If we call gb_interface_release to make cleanup, there may be an unfinished work. This function will call kfree to free the object \"intf\". However, if gb_interface_mode_switch_work is scheduled to run after kfree, it may cause use-after-free error as gb_interface_mode_switch_work will use the object \"intf\". The possible execution flow that may lead to the issue is as follows: CPU0 CPU1 | gb_interface_create | gb_interface_request_mode_switch gb_interface_release | kfree(intf) (free) | | gb_interface_mode_switch_work | mutex_lock(&intf->mutex) (use) Fix it by canceling the work before kfree.", "vulnerability_id":"CVE-2024-39495", From 8b0366e0b470faa1268f33ae323b3af3732cc46c Mon Sep 17 00:00:00 2001 From: Jyothirmai Kottu Date: Thu, 18 Sep 2025 13:01:29 -0700 Subject: [PATCH 31/49] Release yaml changes - vLLM (#5282) * update 0.10.2 * update 0.10.2 --- release_images_general.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/release_images_general.yml b/release_images_general.yml index 1cbf78193521..b1edb5296904 100644 --- a/release_images_general.yml +++ b/release_images_general.yml @@ -44,14 +44,14 @@ release_images: public_registry: True 4: framework: "vllm" - version: "0.10.1" + version: "0.10.2" arch_type: "x86" customer_type: "ec2" general: device_types: [ "gpu" ] python_versions: [ "py312" ] os_version: "ubuntu22.04" - cuda_version: "cu128" + cuda_version: "cu129" example: False disable_sm_tag: False force_release: False @@ -69,4 +69,4 @@ release_images: example: False disable_sm_tag: False force_release: False - public_registry: False + public_registry: True From bb78f9be6ce631c281b7528c2d1a488b8d3fed86 Mon Sep 17 00:00:00 2001 From: Jyothirmai Kottu Date: Fri, 19 Sep 2025 09:56:08 -0700 Subject: [PATCH 32/49] Update Changelog vLLM (#5286) * changelog update * update changelog to 0.10.2 --- vllm/CHANGELOG.md | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/vllm/CHANGELOG.md b/vllm/CHANGELOG.md index 356e592cd4c5..281558999808 100644 --- a/vllm/CHANGELOG.md +++ b/vllm/CHANGELOG.md @@ -2,14 +2,28 @@ All notable changes to vLLM Deep Learning Containers will be documented in this file. +## [0.10.2] - 2025-09-18 +### Updated +- vllm/vllm-openai version `v0.10.2`, see [release note](https://github.com/vllm-project/vllm/releases/tag/v0.10.2) for details. + +### Added +- Introducing vLLM ARM64 support for AWS Graviton (g5g) with NVIDIA T4 GPUs, using XFormers/FlashInfer as attention backend and V0 engine for Turing architecture compatibility - [release tag](https://github.com/aws/deep-learning-containers/releases/tag/v1.1-vllm-arm64-ec2-0.10.2-gpu-py312) + +### Sample ECR URI +``` +763104351884.dkr.ecr.us-west-2.amazonaws.com/vllm-arm64:0.10.2-gpu-py312-cu129-ubuntu22.04-ec2-v1.1 +763104351884.dkr.ecr.us-west-2.amazonaws.com/vllm:0.10.2-gpu-py312-cu129-ubuntu22.04-ec2-v1.0 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.10.2-gpu-py312-cu129-ubuntu22.04-ec2 +``` + ## [0.10.1] - 2025-08-25 ### Updated - vllm/vllm-openai version `v0.10.1.1`, see [release note](https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1) for details. - EFA installer version `1.43.2` ### Sample ECR URI ``` -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.10-gpu-py312-ec2 -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.10.1-gpu-py312-cu128-ubuntu22.04-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.10-gpu-py312-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.10.1-gpu-py312-cu128-ubuntu22.04-ec2 ``` ## [0.10.0] - 2025-08-04 @@ -18,8 +32,8 @@ All notable changes to vLLM Deep Learning Containers will be documented in this - EFA installer version `1.43.1` ### Sample ECR URI ``` -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.10-gpu-py312-ec2 -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.10.0-gpu-py312-cu128-ubuntu22.04-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.10-gpu-py312-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.10.0-gpu-py312-cu128-ubuntu22.04-ec2 ``` ## [0.9.2] - 2025-07-15 @@ -27,8 +41,8 @@ All notable changes to vLLM Deep Learning Containers will be documented in this - vllm/vllm-openai version `v0.9.2`, see [release note](https://github.com/vllm-project/vllm/releases/tag/v0.9.2) for details. ### Sample ECR URI ``` -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.9-gpu-py312-ec2 -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.9.2-gpu-py312-cu128-ubuntu22.04-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.9-gpu-py312-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.9.2-gpu-py312-cu128-ubuntu22.04-ec2 ``` ## [0.9.1] - 2025-06-13 @@ -37,8 +51,8 @@ All notable changes to vLLM Deep Learning Containers will be documented in this - EFA installer version `1.42.0` ### Sample ECR URI ``` -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.9-gpu-py312-ec2 -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.9.1-gpu-py312-cu128-ubuntu22.04-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.9-gpu-py312-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.9.1-gpu-py312-cu128-ubuntu22.04-ec2 ``` @@ -48,8 +62,8 @@ All notable changes to vLLM Deep Learning Containers will be documented in this - EFA installer version `1.41.0` ### Sample ECR URI ``` -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.9-gpu-py312-ec2 -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.9.0-gpu-py312-cu128-ubuntu22.04-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.9-gpu-py312-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.9.0-gpu-py312-cu128-ubuntu22.04-ec2 ``` ## [0.8.5] - 2025-06-02 @@ -59,6 +73,6 @@ All notable changes to vLLM Deep Learning Containers will be documented in this - EFA installer version `1.40.0` ### Sample ECR URI ``` -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.8-gpu-py312-ec2 -763104351884.dkr.ecr.us-east-1.amazonaws.com/0.8.5-gpu-py312-cu128-ubuntu22.04-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.8-gpu-py312-ec2 +763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.8.5-gpu-py312-cu128-ubuntu22.04-ec2 ``` \ No newline at end of file From 14cde701158e95967c1f22bbe39f93af3a704bb3 Mon Sep 17 00:00:00 2001 From: Mounik Chinthapanti <106834051+mounchin@users.noreply.github.com> Date: Fri, 19 Sep 2025 11:01:25 -0700 Subject: [PATCH 33/49] Add Neuronx SDK 2.26.0 images to release_images.md (#5287) * Add Neuronx SDK 2.26.0 images to release_images.md * Remove transformers --- available_images.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/available_images.md b/available_images.md index bc22fb26fabd..8f70be99e3b3 100644 --- a/available_images.md +++ b/available_images.md @@ -366,6 +366,8 @@ Note: Starting from Neuron SDK 2.17.0, Dockerfiles for PyTorch Neuron Containers | Framework | Neuron Package | Neuron SDK Version | Job Type | Supported EC2 Instance Types | Python Version Options | Example URL | |----------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------|--------------------|-----------|------------------------------|------------------------|----------------------------------------------------------------------------------------------------------------------| +| [PyTorch 2.8.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.26.0/docker/pytorch/inference/2.8.0/Dockerfile.neuronx) | torch-neuronx, neuronx_distributed, neuronx_distributed_inference | Neuron 2.26.0 | inference | trn1,trn2,inf2 | 3.11 (py311) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.8.0-neuronx-py311-sdk2.26.0-ubuntu22.04 | +| [PyTorch 2.8.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.26.0/docker/pytorch/training/2.8.0/Dockerfile.neuronx) | torch-neuronx, neuronx_distributed, neuronx_distributed_training | Neuron 2.26.0 | training | trn1,trn2,inf2 | 3.11 (py311) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training-neuronx:2.8.0-neuronx-py311-sdk2.26.0-ubuntu22.04 | | [PyTorch 2.7.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.25.0/docker/pytorch/inference/2.7.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_inference | Neuron 2.25.0 | inference | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.7.0-neuronx-py310-sdk2.25.0-ubuntu22.04 | | [PyTorch 2.7.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.25.0/docker/pytorch/training/2.7.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_training | Neuron 2.25.0 | training | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training-neuronx:2.7.0-neuronx-py310-sdk2.25.0-ubuntu22.04 | | [PyTorch 2.7.0](https://github.com/aws-neuron/deep-learning-containers/blob/2.24.1/docker/pytorch/inference/2.7.0/Dockerfile.neuronx) | torch-neuronx, transformers-neuronx, neuronx_distributed, neuronx_distributed_inference | Neuron 2.24.1 | inference | trn1,trn2,inf2 | 3.10 (py310) | 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.7.0-neuronx-py310-sdk2.24.1-ubuntu22.04 | From 8ce0f88b6a628465727feaa00f93fa3d03c3c9b8 Mon Sep 17 00:00:00 2001 From: Jyothirmai Kottu Date: Mon, 22 Sep 2025 16:46:43 -0700 Subject: [PATCH 34/49] Rebuild pytorch sm arm64 2.6 and pytorch sm arm64 2.5 image (#5261) * rebuild pytorch sm arm64 2.6 image * revert toml * Rebuild pt 2.5 * Rebuild pt 2.6 * rebuild and test pt2.5 * rebuild and test pt2.5 arm64 ec2 * rebuild and test pt2.5 arm64 ec2 * test --cache error * test --cache error * fix --cache problem * fix --cache problem * fix --cache problem * revert toml and dlc py scans * remove py scan files --- .../Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json | 6 +++++- pytorch/inference/docker/2.6/py3/Dockerfile.arm64.cpu | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pytorch/inference/docker/2.5/py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json b/pytorch/inference/docker/2.5/py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json index 6603ab58714e..dbb2e5757650 100644 --- a/pytorch/inference/docker/2.5/py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json +++ b/pytorch/inference/docker/2.5/py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json @@ -1,3 +1,7 @@ { - "70612": "In Jinja2, the from_string function is prone to Server Side Template Injection (SSTI) where it takes the \"source\" parameter as a template object, renders it, and then returns it. The attacker can exploit it with {{INJECTION COMMANDS}} in a URI. \r\nNOTE: The maintainer and multiple third parties believe that this vulnerability isn't valid because users shouldn't use untrusted templates without sandboxing." + "70612": "In Jinja2, the from_string function is prone to Server Side Template Injection (SSTI) where it takes the \"source\" parameter as a template object, renders it, and then returns it. The attacker can exploit it with {{INJECTION COMMANDS}} in a URI. \r\nNOTE: The maintainer and multiple third parties believe that this vulnerability isn't valid because users shouldn't use untrusted templates without sandboxing.", + "79077": "Affected versions of the h2 package are vulnerable to HTTP Request Smuggling due to improper validation of illegal characters in HTTP headers. The package allows CRLF characters to be injected into header names and values without proper sanitisation, which can cause request boundary manipulation when HTTP/2 requests are downgraded to HTTP/1.1 by downstream servers.", + "78828": "Affected versions of the PyTorch package are vulnerable to Denial of Service (DoS) due to improper handling in the MKLDNN pooling implementation. The torch.mkldnn_max_pool2d function fails to properly validate input parameters, allowing crafted inputs to trigger resource exhaustion or crashes in the underlying MKLDNN library. An attacker with local access can exploit this vulnerability by passing specially crafted tensor dimensions or parameters to the max pooling function, causing the application to become unresponsive or crash.", + "77744": "urllib3 is a user-friendly HTTP client library for Python. Prior to 2.5.0, it is possible to disable redirects for all requests by instantiating a PoolManager and specifying retries in a way that disable redirects. By default, requests and botocore users are not affected. An application attempting to mitigate SSRF or open redirect vulnerabilities by disabling redirects at the PoolManager level will remain vulnerable. This issue has been patched in version 2.5.0.", + "77745": "Urllib3 is a user-friendly HTTP client library for Python. Starting in version 2.2.0 and before 2.5.0, urllib3 does not control redirects in browsers and Node.js. urllib3 supports being used in a Pyodide runtime, utilizing the JavaScript Fetch API or falling back on XMLHttpRequest. This means Python libraries can be used to make HTTP requests from a browser or Node.js. Additionally, urllib3 provides a mechanism to control redirects, but the retries and redirect parameters are ignored with Pyodide; the runtime itself determines redirect behaviour. This issue has been patched in version 2.5.0." } diff --git a/pytorch/inference/docker/2.6/py3/Dockerfile.arm64.cpu b/pytorch/inference/docker/2.6/py3/Dockerfile.arm64.cpu index 97b377669b02..3281081505e6 100644 --- a/pytorch/inference/docker/2.6/py3/Dockerfile.arm64.cpu +++ b/pytorch/inference/docker/2.6/py3/Dockerfile.arm64.cpu @@ -189,8 +189,8 @@ RUN chmod +x /usr/local/bin/dockerd-entrypoint.py # add telemetry COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py -COPY sitecustomize.py /usr/local/lib/${PYTHON_SHORT_VERSION}/sitecustomize.py RUN chmod +x /usr/local/bin/deep_learning_container.py +# COPY sitecustomize.py /usr/local/lib/${PYTHON_SHORT_VERSION}/sitecustomize.py RUN HOME_DIR=/root \ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ From 6d3a49bddc0fdf162f5d836fc5ce688362b05a75 Mon Sep 17 00:00:00 2001 From: Jyothirmai Kottu Date: Thu, 25 Sep 2025 15:03:05 -0700 Subject: [PATCH 35/49] Patch tf2.18 SM image (#5299) * patch tf training 2.18 ec2 * patch tf training 2.18 sm * autopatch tf 2.18 sm * autopatch tf 2.18 sm * add docker login * add docker login * retest tensorflow 2.18 * build tf 2.18 * build tf2.18 * revert toml --- .../docker/2.18/py3/Dockerfile.sagemaker.cpu.core_packages.json | 2 +- .../2.18/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/training/docker/2.18/py3/Dockerfile.sagemaker.cpu.core_packages.json b/tensorflow/training/docker/2.18/py3/Dockerfile.sagemaker.cpu.core_packages.json index 394e26d53027..e6519149d65d 100644 --- a/tensorflow/training/docker/2.18/py3/Dockerfile.sagemaker.cpu.core_packages.json +++ b/tensorflow/training/docker/2.18/py3/Dockerfile.sagemaker.cpu.core_packages.json @@ -18,7 +18,7 @@ "version_specifier": "==20.4.1" }, "sagemaker-training": { - "version_specifier": ">=4.8.3" + "version_specifier": "==4.8.4" }, "sagemaker-studio-analytics-extension": { "version_specifier": "<1" diff --git a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json index c37b0a7bca6c..1fed8908ef42 100644 --- a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json +++ b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json @@ -18,7 +18,7 @@ "version_specifier": "==20.4.1" }, "sagemaker-training": { - "version_specifier": ">=4.8.3" + "version_specifier": "==4.8.4" }, "sagemaker-studio-analytics-extension": { "version_specifier": "<1" From e191f29d49daedf5ab4a30f81131781bcf5c4fb1 Mon Sep 17 00:00:00 2001 From: Jinyan Li <97153458+jinyan-li1@users.noreply.github.com> Date: Fri, 26 Sep 2025 10:17:54 -0700 Subject: [PATCH 36/49] patch pytorch 2.5 inference ARM64 sagemaker image (#5298) * patch pytorch 2.5 inference ARM64 sagemaker image --- ...rfile.ec2.arm64.cpu.os_scan_allowlist.json | 2 +- ...sagemaker.arm64.cpu.os_scan_allowlist.json | 62 +++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json b/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json index 77fa6e34a08d..55cd3b721fff 100644 --- a/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json +++ b/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json @@ -30,4 +30,4 @@ "reason_to_ignore": "N/A" } ] -} +} \ No newline at end of file diff --git a/pytorch/inference/docker/2.5/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json b/pytorch/inference/docker/2.5/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json index 77fa6e34a08d..4527b78d7f1b 100644 --- a/pytorch/inference/docker/2.5/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json +++ b/pytorch/inference/docker/2.5/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json @@ -29,5 +29,67 @@ "title": "CVE-2025-32434 - torch", "reason_to_ignore": "N/A" } + ], + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } From b13baec436b73f223bf2176c4cb62bab32948058 Mon Sep 17 00:00:00 2001 From: Jinyan Li <97153458+jinyan-li1@users.noreply.github.com> Date: Fri, 26 Sep 2025 12:14:43 -0700 Subject: [PATCH 37/49] PT 2.5 SM ARM - Comment out latest_release_tag in buildspec (#5305) --- pytorch/inference/buildspec-arm64-2-5-sm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/inference/buildspec-arm64-2-5-sm.yml b/pytorch/inference/buildspec-arm64-2-5-sm.yml index ec5376d5b14d..35b89e3d6421 100644 --- a/pytorch/inference/buildspec-arm64-2-5-sm.yml +++ b/pytorch/inference/buildspec-arm64-2-5-sm.yml @@ -43,7 +43,7 @@ images: torch_serve_version: &TORCHSERVE_VERSION 0.12.0 tool_kit_version: &SM_TOOLKIT_VERSION 2.0.25 tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION , "-sagemaker"] - latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] + # latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ] # skip_build: "False" docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile.arm64., *DEVICE_TYPE ] target: sagemaker From 7984ec588d181407c9d45550b53dc638e8a6174e Mon Sep 17 00:00:00 2001 From: Jyothirmai Kottu Date: Fri, 26 Sep 2025 14:20:42 -0700 Subject: [PATCH 38/49] Patch TF2.18 inference arm64 (#5306) * test tf 2.18 inference amr64 sm * test tf 2.18 inference amr64 ec2 * test tf 2.18 inference amr64 sm * revert toml --- .../docker/2.18/py3/Dockerfile.arm64.cpu | 3 +- ...ockerfile.ec2.arm64.cpu.core_packages.json | 2 +- ...rfile.ec2.arm64.cpu.os_scan_allowlist.json | 176 ++++++++++++++++++ ...ile.sagemaker.arm64.cpu.core_packages.json | 2 +- ...sagemaker.arm64.cpu.os_scan_allowlist.json | 176 ++++++++++++++++++ 5 files changed, 355 insertions(+), 4 deletions(-) diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.arm64.cpu b/tensorflow/inference/docker/2.18/py3/Dockerfile.arm64.cpu index 40242dda1913..338e0574fb99 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.arm64.cpu +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.arm64.cpu @@ -92,8 +92,7 @@ RUN ${PIP} install --no-cache-dir \ gevent \ requests \ grpcio \ - # protobuf version requirements in https://github.com/tensorflow/serving/blob/master/tensorflow_serving/tools/pip_package/setup.py#L66 - "protobuf<5.0" \ + "protobuf==5.29.5" \ packaging \ # using --no-dependencies to avoid installing tensorflow binary && ${PIP} install --no-dependencies --no-cache-dir \ diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.core_packages.json b/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.core_packages.json index e19025e77c9d..6334c2259281 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.core_packages.json +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.core_packages.json @@ -13,6 +13,6 @@ "version_specifier":">=1.24.3,<2.0" }, "protobuf":{ - "version_specifier":">=3.20.3,<6.0.0dev,!=4.21.5,!=4.21.4,!=4.21.3,!=4.21.2,!=4.21.1,!=4.21.0" + "version_specifier":">=5.29.5" } } \ No newline at end of file diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json b/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json index 60e3d94d5c7b..83559f188a63 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json @@ -379,5 +379,181 @@ "title": "CVE-2022-48337 - emacs, emacs-common and 1 more", "reason_to_ignore": "N/A" } + ], + "linux-libc-dev": [ + { + "description": "In the Linux kernel, the following vulnerability has been resolved: memstick: rtsx_usb_ms: Fix slab-use-after-free in rtsx_usb_ms_drv_remove This fixes the following crash: ================================================================== BUG: KASAN: slab-use-after-free in rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] Read of size 8 at addr ffff888136335380 by task *******/6:0/140241 CPU: 6 UID: 0 PID: 140241 Comm: *******/6:0 Kdump: loaded Tainted: G E 6.14.0-rc6+ #1 Tainted: [E]=UNSIGNED_MODULE Hardware name: LENOVO 30FNA1V7CW/1057, BIOS S0EKT54A 07/01/2024 Workqueue: events rtsx_usb_ms_poll_card [rtsx_usb_ms] Call Trace: dump_stack_lvl+0x51/0x70 print_address_description.constprop.0+0x27/0x320 ? rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] print_report+0x3e/0x70 kasan_report+0xab/0xe0 ? rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] ? __pfx_rtsx_usb_ms_poll_card+0x10/0x10 [rtsx_usb_ms] ? __pfx___schedule+0x10/0x10 ? kick_pool+0x3b/0x270 process_", + "vulnerability_id": "CVE-2025-22020", + "name": "CVE-2025-22020", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22020.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-22020 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: drm/amdgpu: fix usage slab after free [ +0.000021] BUG: KASAN: slab-use-after-free in drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000027] Read of size 8 at addr ffff8881b8605f88 by task amd_pci_unplug/2147 [ +0.000023] CPU: 6 PID: 2147 Comm: amd_pci_unplug Not tainted 6.10.0+ #1 [ +0.000016] Hardware name: ASUS System Product Name/ROG STRIX B550-F GAMING (WI-FI), BIOS 1401 12/03/2020 [ +0.000016] Call Trace: [ +0.000008] [ +0.000009] dump_stack_lvl+0x76/0xa0 [ +0.000017] print_report+0xce/0x5f0 [ +0.000017] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000019] ? srso_return_thunk+0x5/0x5f [ +0.000015] ? kasan_complete_mode_report_info+0x72/0x200 [ +0.000016] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000019] kasan_report+0xbe/0x110 [ +0.000015] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000023] __asan_report_load8_noabort+0x14/0x30 [ +0.000014] drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.00", + "vulnerability_id": "CVE-2024-56551", + "name": "CVE-2024-56551", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-56551.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-56551 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: of: module: add buffer overflow check in of_modalias() In of_modalias(), if the buffer happens to be too small even for the 1st snprintf() call, the len parameter will become negative and str parameter (if not NULL initially) will point beyond the buffer's end. Add the buffer overflow check after the 1st snprintf() call and fix such check after the strlen() call (accounting for the terminating NUL char).", + "vulnerability_id": "CVE-2024-38541", + "name": "CVE-2024-38541", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 9.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 9.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "CRITICAL", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-38541.html", + "source": "UBUNTU_CVE", + "severity": "CRITICAL", + "status": "ACTIVE", + "title": "CVE-2024-38541 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: net: atm: fix use after free in lec_send() The ->send() operation frees skb so save the length before calling ->send() to avoid a use after free.", + "vulnerability_id": "CVE-2025-22004", + "name": "CVE-2025-22004", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22004.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-22004 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: scsi: iscsi_tcp: Fix UAF during logout when accessing the shost ipaddress Bug report and analysis from Ding Hui. During iSCSI session logout, if another task accesses the shost ipaddress attr, we can get a KASAN UAF report like this: [ 276.942144] BUG: KASAN: use-after-free in _raw_spin_lock_bh+0x78/0xe0 [ 276.942535] Write of size 4 at addr ffff8881053b45b8 by task cat/4088 [ 276.943511] CPU: 2 PID: 4088 Comm: cat Tainted: G E 6.1.0-rc8+ #3 [ 276.943997] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 [ 276.944470] Call Trace: [ 276.944943] [ 276.945397] dump_stack_lvl+0x34/0x48 [ 276.945887] print_address_description.constprop.0+0x86/0x1e7 [ 276.946421] print_report+0x36/0x4f [ 276.947358] kasan_report+0xad/0x130 [ 276.948234] kasan_check_range+0x35/0x1c0 [ 276.948674] _raw_spin_lock_bh+0x78/0xe0 [ 276.949989] iscsi_sw_tcp_host_get_param+0xad/0x2e0 [iscsi_tcp] [ 276.951765] s", + "vulnerability_id": "CVE-2023-52975", + "name": "CVE-2023-52975", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2023/CVE-2023-52975.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2023-52975 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: tty: n_gsm: Fix use-after-free in gsm_cleanup_mux BUG: KASAN: slab-use-after-free in gsm_cleanup_mux+0x77b/0x7b0 drivers/tty/n_gsm.c:3160 [n_gsm] Read of size 8 at addr ffff88815fe99c00 by task poc/3379 CPU: 0 UID: 0 PID: 3379 Comm: poc Not tainted 6.11.0+ #56 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 Call Trace: gsm_cleanup_mux+0x77b/0x7b0 drivers/tty/n_gsm.c:3160 [n_gsm] __pfx_gsm_cleanup_mux+0x10/0x10 drivers/tty/n_gsm.c:3124 [n_gsm] __pfx_sched_clock_cpu+0x10/0x10 kernel/sched/clock.c:389 update_load_avg+0x1c1/0x27b0 kernel/sched/fair.c:4500 __pfx_min_vruntime_cb_rotate+0x10/0x10 kernel/sched/fair.c:846 __rb_insert_augmented+0x492/0xbf0 lib/rbtree.c:161 gsmld_ioctl+0x395/0x1450 drivers/tty/n_gsm.c:3408 [n_gsm] _raw_spin_lock_irqsave+0x92/0xf0 arch/x86/include/asm/atomic.h:107 __pfx_gsmld_ioctl+0x10/0x10 drivers/tty/n_gsm.c:3822 [n_gsm] ktime_get+0x5e/0x140 kernel/time", + "vulnerability_id": "CVE-2024-50073", + "name": "CVE-2024-50073", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-50073.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-50073 - linux-libc-dev", + "reason_to_ignore": "N/A" + } ] } diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.core_packages.json b/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.core_packages.json index 4d4f07579a79..dc23a47b20d2 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.core_packages.json +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.core_packages.json @@ -13,7 +13,7 @@ "version_specifier":">=1.24.3,<2.0" }, "protobuf":{ - "version_specifier":">=3.20.3,<6.0.0dev,!=4.21.5,!=4.21.4,!=4.21.3,!=4.21.2,!=4.21.1,!=4.21.0" + "version_specifier":"==5.29.5" }, "falcon":{ "version_specifier":"==3.1.0" diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json b/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json index 8cd50f3a3742..b75e3016e8c8 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json @@ -379,5 +379,181 @@ "title": "CVE-2023-28617 - emacs, emacs-common and 1 more", "reason_to_ignore": "N/A" } + ], + "linux-libc-dev": [ + { + "description": "In the Linux kernel, the following vulnerability has been resolved: memstick: rtsx_usb_ms: Fix slab-use-after-free in rtsx_usb_ms_drv_remove This fixes the following crash: ================================================================== BUG: KASAN: slab-use-after-free in rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] Read of size 8 at addr ffff888136335380 by task *******/6:0/140241 CPU: 6 UID: 0 PID: 140241 Comm: *******/6:0 Kdump: loaded Tainted: G E 6.14.0-rc6+ #1 Tainted: [E]=UNSIGNED_MODULE Hardware name: LENOVO 30FNA1V7CW/1057, BIOS S0EKT54A 07/01/2024 Workqueue: events rtsx_usb_ms_poll_card [rtsx_usb_ms] Call Trace: dump_stack_lvl+0x51/0x70 print_address_description.constprop.0+0x27/0x320 ? rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] print_report+0x3e/0x70 kasan_report+0xab/0xe0 ? rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] ? __pfx_rtsx_usb_ms_poll_card+0x10/0x10 [rtsx_usb_ms] ? __pfx___schedule+0x10/0x10 ? kick_pool+0x3b/0x270 process_", + "vulnerability_id": "CVE-2025-22020", + "name": "CVE-2025-22020", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22020.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-22020 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: drm/amdgpu: fix usage slab after free [ +0.000021] BUG: KASAN: slab-use-after-free in drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000027] Read of size 8 at addr ffff8881b8605f88 by task amd_pci_unplug/2147 [ +0.000023] CPU: 6 PID: 2147 Comm: amd_pci_unplug Not tainted 6.10.0+ #1 [ +0.000016] Hardware name: ASUS System Product Name/ROG STRIX B550-F GAMING (WI-FI), BIOS 1401 12/03/2020 [ +0.000016] Call Trace: [ +0.000008] [ +0.000009] dump_stack_lvl+0x76/0xa0 [ +0.000017] print_report+0xce/0x5f0 [ +0.000017] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000019] ? srso_return_thunk+0x5/0x5f [ +0.000015] ? kasan_complete_mode_report_info+0x72/0x200 [ +0.000016] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000019] kasan_report+0xbe/0x110 [ +0.000015] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000023] __asan_report_load8_noabort+0x14/0x30 [ +0.000014] drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.00", + "vulnerability_id": "CVE-2024-56551", + "name": "CVE-2024-56551", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-56551.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-56551 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: of: module: add buffer overflow check in of_modalias() In of_modalias(), if the buffer happens to be too small even for the 1st snprintf() call, the len parameter will become negative and str parameter (if not NULL initially) will point beyond the buffer's end. Add the buffer overflow check after the 1st snprintf() call and fix such check after the strlen() call (accounting for the terminating NUL char).", + "vulnerability_id": "CVE-2024-38541", + "name": "CVE-2024-38541", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 9.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 9.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "CRITICAL", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-38541.html", + "source": "UBUNTU_CVE", + "severity": "CRITICAL", + "status": "ACTIVE", + "title": "CVE-2024-38541 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: net: atm: fix use after free in lec_send() The ->send() operation frees skb so save the length before calling ->send() to avoid a use after free.", + "vulnerability_id": "CVE-2025-22004", + "name": "CVE-2025-22004", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22004.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-22004 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: scsi: iscsi_tcp: Fix UAF during logout when accessing the shost ipaddress Bug report and analysis from Ding Hui. During iSCSI session logout, if another task accesses the shost ipaddress attr, we can get a KASAN UAF report like this: [ 276.942144] BUG: KASAN: use-after-free in _raw_spin_lock_bh+0x78/0xe0 [ 276.942535] Write of size 4 at addr ffff8881053b45b8 by task cat/4088 [ 276.943511] CPU: 2 PID: 4088 Comm: cat Tainted: G E 6.1.0-rc8+ #3 [ 276.943997] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 [ 276.944470] Call Trace: [ 276.944943] [ 276.945397] dump_stack_lvl+0x34/0x48 [ 276.945887] print_address_description.constprop.0+0x86/0x1e7 [ 276.946421] print_report+0x36/0x4f [ 276.947358] kasan_report+0xad/0x130 [ 276.948234] kasan_check_range+0x35/0x1c0 [ 276.948674] _raw_spin_lock_bh+0x78/0xe0 [ 276.949989] iscsi_sw_tcp_host_get_param+0xad/0x2e0 [iscsi_tcp] [ 276.951765] s", + "vulnerability_id": "CVE-2023-52975", + "name": "CVE-2023-52975", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2023/CVE-2023-52975.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2023-52975 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: tty: n_gsm: Fix use-after-free in gsm_cleanup_mux BUG: KASAN: slab-use-after-free in gsm_cleanup_mux+0x77b/0x7b0 drivers/tty/n_gsm.c:3160 [n_gsm] Read of size 8 at addr ffff88815fe99c00 by task poc/3379 CPU: 0 UID: 0 PID: 3379 Comm: poc Not tainted 6.11.0+ #56 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 Call Trace: gsm_cleanup_mux+0x77b/0x7b0 drivers/tty/n_gsm.c:3160 [n_gsm] __pfx_gsm_cleanup_mux+0x10/0x10 drivers/tty/n_gsm.c:3124 [n_gsm] __pfx_sched_clock_cpu+0x10/0x10 kernel/sched/clock.c:389 update_load_avg+0x1c1/0x27b0 kernel/sched/fair.c:4500 __pfx_min_vruntime_cb_rotate+0x10/0x10 kernel/sched/fair.c:846 __rb_insert_augmented+0x492/0xbf0 lib/rbtree.c:161 gsmld_ioctl+0x395/0x1450 drivers/tty/n_gsm.c:3408 [n_gsm] _raw_spin_lock_irqsave+0x92/0xf0 arch/x86/include/asm/atomic.h:107 __pfx_gsmld_ioctl+0x10/0x10 drivers/tty/n_gsm.c:3822 [n_gsm] ktime_get+0x5e/0x140 kernel/time", + "vulnerability_id": "CVE-2024-50073", + "name": "CVE-2024-50073", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-50073.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-50073 - linux-libc-dev", + "reason_to_ignore": "N/A" + } ] } From 9985c1ea1d79c3a527bb21f310e63740955b801b Mon Sep 17 00:00:00 2001 From: Jinyan Li <97153458+jinyan-li1@users.noreply.github.com> Date: Fri, 26 Sep 2025 14:41:04 -0700 Subject: [PATCH 39/49] Patch tensoflow 2.19 inference and training images (#5304) * Patch tensoflow 2.19 inference and training images --- ...sagemaker.arm64.cpu.os_scan_allowlist.json | 64 +++++++++++++++++++ ...rfile.sagemaker.gpu.os_scan_allowlist.json | 62 ++++++++++++++++++ ...rfile.sagemaker.cpu.os_scan_allowlist.json | 62 ++++++++++++++++++ ...rfile.sagemaker.gpu.os_scan_allowlist.json | 62 ++++++++++++++++++ 4 files changed, 250 insertions(+) create mode 100644 tensorflow/inference/docker/2.19/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json diff --git a/tensorflow/inference/docker/2.19/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json b/tensorflow/inference/docker/2.19/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json new file mode 100644 index 000000000000..b1749544dbc9 --- /dev/null +++ b/tensorflow/inference/docker/2.19/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json @@ -0,0 +1,64 @@ +{ + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ] +} \ No newline at end of file diff --git a/tensorflow/inference/docker/2.19/py3/cu122/Dockerfile.sagemaker.gpu.os_scan_allowlist.json b/tensorflow/inference/docker/2.19/py3/cu122/Dockerfile.sagemaker.gpu.os_scan_allowlist.json index 73a92fd91c64..5f9c8b9f3b42 100644 --- a/tensorflow/inference/docker/2.19/py3/cu122/Dockerfile.sagemaker.gpu.os_scan_allowlist.json +++ b/tensorflow/inference/docker/2.19/py3/cu122/Dockerfile.sagemaker.gpu.os_scan_allowlist.json @@ -475,5 +475,67 @@ "title": "CVE-2022-2068 - openssl", "reason_to_ignore": "N/A" } + ], + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } diff --git a/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json b/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json index 540973552e6e..11ee152c09e9 100644 --- a/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json +++ b/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json @@ -211,5 +211,67 @@ "status": "ACTIVE", "reason_to_ignore": "N/A" } + ], + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } diff --git a/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.os_scan_allowlist.json b/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.os_scan_allowlist.json index 540973552e6e..11ee152c09e9 100644 --- a/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.os_scan_allowlist.json +++ b/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.os_scan_allowlist.json @@ -211,5 +211,67 @@ "status": "ACTIVE", "reason_to_ignore": "N/A" } + ], + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } From 83794931f8c78ecbafdb076f5a5683bd22e29dfe Mon Sep 17 00:00:00 2001 From: zhuofuAMZ <160550915+zhuofuAMZ@users.noreply.github.com> Date: Fri, 26 Sep 2025 15:37:10 -0700 Subject: [PATCH 40/49] Allowlisting dpkg as it is not patchable (#5308) --- ...rfile.ec2.arm64.cpu.os_scan_allowlist.json | 64 ++++++++++++++++++- .../Dockerfile.ec2.cpu.os_scan_allowlist.json | 62 ++++++++++++++++++ ...rfile.ec2.arm64.gpu.os_scan_allowlist.json | 62 ++++++++++++++++++ .../Dockerfile.ec2.gpu.os_scan_allowlist.json | 63 ++++++++++++++++++ ...rfile.sagemaker.gpu.os_scan_allowlist.json | 62 ++++++++++++++++++ ...rfile.ec2.arm64.cpu.os_scan_allowlist.json | 64 +++++++++++++++++++ .../Dockerfile.ec2.cpu.os_scan_allowlist.json | 64 +++++++++++++++++++ ...sagemaker.arm64.cpu.os_scan_allowlist.json | 64 +++++++++++++++++++ ...rfile.ec2.arm64.gpu.os_scan_allowlist.json | 64 +++++++++++++++++++ .../Dockerfile.ec2.gpu.os_scan_allowlist.json | 64 +++++++++++++++++++ ...rfile.sagemaker.gpu.os_scan_allowlist.json | 64 +++++++++++++++++++ ...rfile.sagemaker.cpu.os_scan_allowlist.json | 63 ++++++++++++++++++ ...rfile.sagemaker.gpu.os_scan_allowlist.json | 63 ++++++++++++++++++ 13 files changed, 822 insertions(+), 1 deletion(-) create mode 100644 pytorch/inference/docker/2.6/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json create mode 100644 pytorch/inference/docker/2.6/py3/Dockerfile.ec2.cpu.os_scan_allowlist.json create mode 100644 pytorch/inference/docker/2.6/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json create mode 100644 pytorch/inference/docker/2.6/py3/cu124/Dockerfile.ec2.arm64.gpu.os_scan_allowlist.json create mode 100644 pytorch/inference/docker/2.6/py3/cu124/Dockerfile.ec2.gpu.os_scan_allowlist.json create mode 100644 pytorch/inference/docker/2.6/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json diff --git a/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json b/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json index 55cd3b721fff..10d8459bb72e 100644 --- a/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json +++ b/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json @@ -29,5 +29,67 @@ "title": "CVE-2025-32434 - torch", "reason_to_ignore": "N/A" } + ], + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] -} \ No newline at end of file +} diff --git a/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.cpu.os_scan_allowlist.json b/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.cpu.os_scan_allowlist.json index 783936bb4747..cd70d4a09e27 100644 --- a/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.cpu.os_scan_allowlist.json +++ b/pytorch/inference/docker/2.5/py3/Dockerfile.ec2.cpu.os_scan_allowlist.json @@ -89,5 +89,67 @@ "title": "CVE-2025-32434 - torch", "reason_to_ignore": "N/A" } + ], + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } diff --git a/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.ec2.arm64.gpu.os_scan_allowlist.json b/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.ec2.arm64.gpu.os_scan_allowlist.json index 77fa6e34a08d..10d8459bb72e 100644 --- a/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.ec2.arm64.gpu.os_scan_allowlist.json +++ b/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.ec2.arm64.gpu.os_scan_allowlist.json @@ -29,5 +29,67 @@ "title": "CVE-2025-32434 - torch", "reason_to_ignore": "N/A" } + ], + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } diff --git a/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.ec2.gpu.os_scan_allowlist.json b/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.ec2.gpu.os_scan_allowlist.json index 783936bb4747..ea801c9046da 100644 --- a/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.ec2.gpu.os_scan_allowlist.json +++ b/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.ec2.gpu.os_scan_allowlist.json @@ -89,5 +89,68 @@ "title": "CVE-2025-32434 - torch", "reason_to_ignore": "N/A" } + ], + + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } diff --git a/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json b/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json index 783936bb4747..cd70d4a09e27 100644 --- a/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json +++ b/pytorch/inference/docker/2.5/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json @@ -89,5 +89,67 @@ "title": "CVE-2025-32434 - torch", "reason_to_ignore": "N/A" } + ], + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } diff --git a/pytorch/inference/docker/2.6/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json b/pytorch/inference/docker/2.6/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json new file mode 100644 index 000000000000..769b5af66da8 --- /dev/null +++ b/pytorch/inference/docker/2.6/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json @@ -0,0 +1,64 @@ +{ + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ] +} diff --git a/pytorch/inference/docker/2.6/py3/Dockerfile.ec2.cpu.os_scan_allowlist.json b/pytorch/inference/docker/2.6/py3/Dockerfile.ec2.cpu.os_scan_allowlist.json new file mode 100644 index 000000000000..769b5af66da8 --- /dev/null +++ b/pytorch/inference/docker/2.6/py3/Dockerfile.ec2.cpu.os_scan_allowlist.json @@ -0,0 +1,64 @@ +{ + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ] +} diff --git a/pytorch/inference/docker/2.6/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json b/pytorch/inference/docker/2.6/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json new file mode 100644 index 000000000000..c803967b40fa --- /dev/null +++ b/pytorch/inference/docker/2.6/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json @@ -0,0 +1,64 @@ +{ + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ] +} diff --git a/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.ec2.arm64.gpu.os_scan_allowlist.json b/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.ec2.arm64.gpu.os_scan_allowlist.json new file mode 100644 index 000000000000..769b5af66da8 --- /dev/null +++ b/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.ec2.arm64.gpu.os_scan_allowlist.json @@ -0,0 +1,64 @@ +{ + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ] +} diff --git a/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.ec2.gpu.os_scan_allowlist.json b/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.ec2.gpu.os_scan_allowlist.json new file mode 100644 index 000000000000..769b5af66da8 --- /dev/null +++ b/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.ec2.gpu.os_scan_allowlist.json @@ -0,0 +1,64 @@ +{ + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ] +} diff --git a/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json b/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json new file mode 100644 index 000000000000..769b5af66da8 --- /dev/null +++ b/pytorch/inference/docker/2.6/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json @@ -0,0 +1,64 @@ +{ + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ] +} diff --git a/pytorch/training/docker/2.5/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json b/pytorch/training/docker/2.5/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json index 446900b70600..6d0cf914e7f5 100644 --- a/pytorch/training/docker/2.5/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json +++ b/pytorch/training/docker/2.5/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json @@ -441,5 +441,68 @@ "title": "CVE-2025-30167 - jupyter_core", "reason_to_ignore": "N/A" } + ], + + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } diff --git a/pytorch/training/docker/2.5/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json b/pytorch/training/docker/2.5/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json index 9a2b585cf840..dd1742ef687b 100644 --- a/pytorch/training/docker/2.5/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json +++ b/pytorch/training/docker/2.5/py3/cu124/Dockerfile.sagemaker.gpu.os_scan_allowlist.json @@ -1290,5 +1290,68 @@ "title": "CVE-2025-30167 - jupyter_core", "reason_to_ignore": "N/A" } + ], + + "dpkg": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "dpkg", + "package_details": { + "file_path": null, + "name": "dpkg", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } + ], + "libdpkg-perl": [ + { + "description": "It was discovered that dpkg-deb does not properly sanitize directory permissions when extracting a control member into a temporary directory, which is documented as being a safe operation even on untrusted data. This may result in leaving temporary files behind on cleanup. Given automated and repeated execution of dpkg-deb commands on adversarial .deb packages or with well compressible files, placed inside a directory with permissions not allowing removal by a non-root user, this can end up in a DoS scenario due to causing disk quota exhaustion or disk full conditions.", + "vulnerability_id": "CVE-2025-6297", + "name": "CVE-2025-6297", + "package_name": "libdpkg-perl", + "package_details": { + "file_path": null, + "name": "libdpkg-perl", + "package_manager": "OS", + "version": "1.21.1ubuntu2.3", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-6297.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-6297 - dpkg, libdpkg-perl", + "reason_to_ignore": "N/A" + } ] } From bfcde1a54914aee307cc3ae5c0ee5325d01f796e Mon Sep 17 00:00:00 2001 From: Jyothirmai Kottu Date: Fri, 26 Sep 2025 16:18:19 -0700 Subject: [PATCH 41/49] add more cves (#5309) --- ...rfile.ec2.arm64.cpu.os_scan_allowlist.json | 377 ++++++++++-------- ...sagemaker.arm64.cpu.os_scan_allowlist.json | 375 +++++++++-------- 2 files changed, 427 insertions(+), 325 deletions(-) diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json b/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json index 83559f188a63..92c19e1a1f3b 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json @@ -382,178 +382,229 @@ ], "linux-libc-dev": [ { - "description": "In the Linux kernel, the following vulnerability has been resolved: memstick: rtsx_usb_ms: Fix slab-use-after-free in rtsx_usb_ms_drv_remove This fixes the following crash: ================================================================== BUG: KASAN: slab-use-after-free in rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] Read of size 8 at addr ffff888136335380 by task *******/6:0/140241 CPU: 6 UID: 0 PID: 140241 Comm: *******/6:0 Kdump: loaded Tainted: G E 6.14.0-rc6+ #1 Tainted: [E]=UNSIGNED_MODULE Hardware name: LENOVO 30FNA1V7CW/1057, BIOS S0EKT54A 07/01/2024 Workqueue: events rtsx_usb_ms_poll_card [rtsx_usb_ms] Call Trace: dump_stack_lvl+0x51/0x70 print_address_description.constprop.0+0x27/0x320 ? rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] print_report+0x3e/0x70 kasan_report+0xab/0xe0 ? rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] ? __pfx_rtsx_usb_ms_poll_card+0x10/0x10 [rtsx_usb_ms] ? __pfx___schedule+0x10/0x10 ? kick_pool+0x3b/0x270 process_", - "vulnerability_id": "CVE-2025-22020", - "name": "CVE-2025-22020", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22020.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2025-22020 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: sunrpc: fix one UAF issue caused by sunrpc kernel tcp socket BUG: KASAN: slab-use-after-free in tcp_write_timer_handler+0x156/0x3e0 Read of size 1 at addr ffff888111f322cd by task swapper/0/0 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.0-rc4-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 Call Trace: dump_stack_lvl+0x68/0xa0 print_address_description.constprop.0+0x2c/0x3d0 print_report+0xb4/0x270 kasan_report+0xbd/0xf0 tcp_write_timer_handler+0x156/0x3e0 tcp_write_timer+0x66/0x170 call_timer_fn+0xfb/0x1d0 __run_timers+0x3f8/0x480 run_timer_softirq+0x9b/0x100 handle_softirqs+0x153/0x390 __irq_exit_rcu+0x103/0x120 irq_exit_rcu+0xe/0x20 sysvec_apic_timer_interrupt+0x76/0x90 asm_sysvec_apic_timer_interrupt+0x1a/0x20 RIP: 0010:default_idle+0xf/0x20 Code: 4c 01 c7 4c 29 c2 e9 72 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 66 90 0f 00 2d 33 f8 25 00 fb f4 c3 cc", + "vulnerability_id": "CVE-2024-53168", + "name": "CVE-2024-53168", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-53168.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-53168 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: drm/amdgpu: fix usage slab after free [ +0.000021] BUG: KASAN: slab-use-after-free in drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000027] Read of size 8 at addr ffff8881b8605f88 by task amd_pci_unplug/2147 [ +0.000023] CPU: 6 PID: 2147 Comm: amd_pci_unplug Not tainted 6.10.0+ #1 [ +0.000016] Hardware name: ASUS System Product Name/ROG STRIX B550-F GAMING (WI-FI), BIOS 1401 12/03/2020 [ +0.000016] Call Trace: [ +0.000008] [ +0.000009] dump_stack_lvl+0x76/0xa0 [ +0.000017] print_report+0xce/0x5f0 [ +0.000017] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000019] ? srso_return_thunk+0x5/0x5f [ +0.000015] ? kasan_complete_mode_report_info+0x72/0x200 [ +0.000016] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000019] kasan_report+0xbe/0x110 [ +0.000015] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000023] __asan_report_load8_noabort+0x14/0x30 [ +0.000014] drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.00", - "vulnerability_id": "CVE-2024-56551", - "name": "CVE-2024-56551", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-56551.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2024-56551 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: smb: client: fix potential deadlock when releasing mids All release_mid() callers seem to hold a reference of @mid so there is no need to call kref_put(&mid->refcount, __release_mid) under @server->mid_lock spinlock. If they don't, then an use-after-free bug would have occurred anyways. By getting rid of such spinlock also fixes a potential deadlock as shown below CPU 0 CPU 1 ------------------------------------------------------------------ cifs_demultiplex_thread() cifs_debug_data_proc_show() release_mid() spin_lock(&server->mid_lock); spin_lock(&cifs_tcp_ses_lock) spin_lock(&server->mid_lock) __release_mid() smb2_find_smb_tcon() spin_lock(&cifs_tcp_ses_lock) deadlock", + "vulnerability_id": "CVE-2023-52757", + "name": "CVE-2023-52757", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2023/CVE-2023-52757.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2023-52757 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: of: module: add buffer overflow check in of_modalias() In of_modalias(), if the buffer happens to be too small even for the 1st snprintf() call, the len parameter will become negative and str parameter (if not NULL initially) will point beyond the buffer's end. Add the buffer overflow check after the 1st snprintf() call and fix such check after the strlen() call (accounting for the terminating NUL char).", - "vulnerability_id": "CVE-2024-38541", - "name": "CVE-2024-38541", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 9.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 9.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "CRITICAL", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-38541.html", - "source": "UBUNTU_CVE", - "severity": "CRITICAL", - "status": "ACTIVE", - "title": "CVE-2024-38541 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: tracing: Fix use-after-free in print_graph_function_flags during tracer switching Kairui reported a UAF issue in print_graph_function_flags() during ftrace stress testing [1]. This issue can be reproduced if puting a 'mdelay(10)' after 'mutex_unlock(&trace_types_lock)' in s_start(), and executing the following script: $ echo function_graph > current_tracer $ cat trace > /dev/null & $ sleep 5 # Ensure the 'cat' reaches the 'mdelay(10)' point $ echo timerlat > current_tracer The root cause lies in the two calls to print_graph_function_flags within print_trace_line during each s_show(): * One through 'iter->trace->print_line()'; * Another through 'event->funcs->trace()', which is hidden in print_trace_fmt() before print_trace_line returns. Tracer switching only updates the former, while the latter continues to use the print_line function of the old tracer, which in the script above is print_graph_function_flags. Moreover, when switching from the", + "vulnerability_id": "CVE-2025-22035", + "name": "CVE-2025-22035", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22035.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-22035 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: net: atm: fix use after free in lec_send() The ->send() operation frees skb so save the length before calling ->send() to avoid a use after free.", - "vulnerability_id": "CVE-2025-22004", - "name": "CVE-2025-22004", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22004.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2025-22004 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: Bluetooth: L2CAP: Fix uaf in l2cap_connect [Syzbot reported] BUG: KASAN: slab-use-after-free in l2cap_connect.constprop.0+0x10d8/0x1270 net/bluetooth/l2cap_core.c:3949 Read of size 8 at addr ffff8880241e9800 by task kworker/u9:0/54", + "vulnerability_id": "CVE-2024-49950", + "name": "CVE-2024-49950", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-49950.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-49950 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: scsi: iscsi_tcp: Fix UAF during logout when accessing the shost ipaddress Bug report and analysis from Ding Hui. During iSCSI session logout, if another task accesses the shost ipaddress attr, we can get a KASAN UAF report like this: [ 276.942144] BUG: KASAN: use-after-free in _raw_spin_lock_bh+0x78/0xe0 [ 276.942535] Write of size 4 at addr ffff8881053b45b8 by task cat/4088 [ 276.943511] CPU: 2 PID: 4088 Comm: cat Tainted: G E 6.1.0-rc8+ #3 [ 276.943997] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 [ 276.944470] Call Trace: [ 276.944943] [ 276.945397] dump_stack_lvl+0x34/0x48 [ 276.945887] print_address_description.constprop.0+0x86/0x1e7 [ 276.946421] print_report+0x36/0x4f [ 276.947358] kasan_report+0xad/0x130 [ 276.948234] kasan_check_range+0x35/0x1c0 [ 276.948674] _raw_spin_lock_bh+0x78/0xe0 [ 276.949989] iscsi_sw_tcp_host_get_param+0xad/0x2e0 [iscsi_tcp] [ 276.951765] s", - "vulnerability_id": "CVE-2023-52975", - "name": "CVE-2023-52975", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2023/CVE-2023-52975.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2023-52975 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: smb: client: fix UAF in async decryption Doing an async decryption (large read) crashes with a slab-use-after-free way down in the crypto API. Reproducer: # mount.cifs -o ...,seal,esize=1 //srv/share /mnt # dd if=/mnt/largefile of=/dev/null", + "vulnerability_id": "CVE-2024-50047", + "name": "CVE-2024-50047", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-50047.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-50047 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: tty: n_gsm: Fix use-after-free in gsm_cleanup_mux BUG: KASAN: slab-use-after-free in gsm_cleanup_mux+0x77b/0x7b0 drivers/tty/n_gsm.c:3160 [n_gsm] Read of size 8 at addr ffff88815fe99c00 by task poc/3379 CPU: 0 UID: 0 PID: 3379 Comm: poc Not tainted 6.11.0+ #56 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 Call Trace: gsm_cleanup_mux+0x77b/0x7b0 drivers/tty/n_gsm.c:3160 [n_gsm] __pfx_gsm_cleanup_mux+0x10/0x10 drivers/tty/n_gsm.c:3124 [n_gsm] __pfx_sched_clock_cpu+0x10/0x10 kernel/sched/clock.c:389 update_load_avg+0x1c1/0x27b0 kernel/sched/fair.c:4500 __pfx_min_vruntime_cb_rotate+0x10/0x10 kernel/sched/fair.c:846 __rb_insert_augmented+0x492/0xbf0 lib/rbtree.c:161 gsmld_ioctl+0x395/0x1450 drivers/tty/n_gsm.c:3408 [n_gsm] _raw_spin_lock_irqsave+0x92/0xf0 arch/x86/include/asm/atomic.h:107 __pfx_gsmld_ioctl+0x10/0x10 drivers/tty/n_gsm.c:3822 [n_gsm] ktime_get+0x5e/0x140 kernel/time", - "vulnerability_id": "CVE-2024-50073", - "name": "CVE-2024-50073", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-50073.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2024-50073 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: iscsi_ibft: Fix UBSAN shift-out-of-bounds warning in ibft_attr_show_nic() When performing an iSCSI boot using IPv6, iscsistart still reads the /sys/firmware/ibft/ethernetX/subnet-mask entry. Since the IPv6 prefix length is 64, this causes the shift exponent to become negative, triggering a UBSAN warning. As the concept of a subnet mask does not apply to IPv6, the value is set to ~0 to suppress the warning message.", + "vulnerability_id": "CVE-2025-21993", + "name": "CVE-2025-21993", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.1, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.1, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-21993.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-21993 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: memstick: rtsx_usb_ms: Fix slab-use-after-free in rtsx_usb_ms_drv_remove This fixes the following crash: ================================================================== BUG: KASAN: slab-use-after-free in rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] Read of size 8 at addr ffff888136335380 by task kworker/6:0/140241", + "vulnerability_id": "CVE-2025-22020", + "name": "CVE-2025-22020", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22020.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-22020 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes Currently, load_microcode_amd() iterates over all NUMA nodes, retrieves their CPU masks and unconditionally accesses per-CPU data for the first CPU of each mask. According to Documentation/admin-guide/mm/numaperf.rst: "Some memory may share the same node as a CPU, and others are provided as memory only nodes." Therefore, some node CPU masks may be empty and wouldn't have a "first CPU". On a machine with far memory (and therefore CPU-less NUMA nodes): - cpumask_of_node(nid) is 0 - cpumask_first(0) is CONFIG_NR_CPUS - cpu_data(CONFIG_NR_CPUS) accesses the cpu_info per-CPU array at an index that is 1 out of bounds", + "vulnerability_id": "CVE-2025-21991", + "name": "CVE-2025-21991", + "package_name": "linux-libc-dev", + "package_details": { + "file_path":null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-21991.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-21991 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: jfs: fix slab-out-of-bounds read in ea_get() During the "size_check" label in ea_get(), the code checks if the extended attribute list (xattr) size matches ea_size. If not, it logs "ea_get: invalid extended attribute" and calls print_hex_dump(). Here, EALIST_SIZE(ea_buf->xattr) returns 4110417968, which exceeds INT_MAX (2,147,483,647). Then ea_size is clamped: int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr)); Although clamp_t aims to bound ea_size between 0 and 4110417968, the upper limit is treated as an int, causing an overflow above 2^31 - 1.", + "vulnerability_id": "CVE-2025-39735", + "name": "CVE-2025-39735", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.1, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.1, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-39735.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-39735 - linux-libc-dev", + "reason_to_ignore": "N/A" } ] -} +} \ No newline at end of file diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json b/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json index b75e3016e8c8..82eceaeb893c 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json @@ -382,178 +382,229 @@ ], "linux-libc-dev": [ { - "description": "In the Linux kernel, the following vulnerability has been resolved: memstick: rtsx_usb_ms: Fix slab-use-after-free in rtsx_usb_ms_drv_remove This fixes the following crash: ================================================================== BUG: KASAN: slab-use-after-free in rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] Read of size 8 at addr ffff888136335380 by task *******/6:0/140241 CPU: 6 UID: 0 PID: 140241 Comm: *******/6:0 Kdump: loaded Tainted: G E 6.14.0-rc6+ #1 Tainted: [E]=UNSIGNED_MODULE Hardware name: LENOVO 30FNA1V7CW/1057, BIOS S0EKT54A 07/01/2024 Workqueue: events rtsx_usb_ms_poll_card [rtsx_usb_ms] Call Trace: dump_stack_lvl+0x51/0x70 print_address_description.constprop.0+0x27/0x320 ? rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] print_report+0x3e/0x70 kasan_report+0xab/0xe0 ? rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] ? __pfx_rtsx_usb_ms_poll_card+0x10/0x10 [rtsx_usb_ms] ? __pfx___schedule+0x10/0x10 ? kick_pool+0x3b/0x270 process_", - "vulnerability_id": "CVE-2025-22020", - "name": "CVE-2025-22020", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22020.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2025-22020 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: sunrpc: fix one UAF issue caused by sunrpc kernel tcp socket BUG: KASAN: slab-use-after-free in tcp_write_timer_handler+0x156/0x3e0 Read of size 1 at addr ffff888111f322cd by task swapper/0/0 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.0-rc4-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 Call Trace: dump_stack_lvl+0x68/0xa0 print_address_description.constprop.0+0x2c/0x3d0 print_report+0xb4/0x270 kasan_report+0xbd/0xf0 tcp_write_timer_handler+0x156/0x3e0 tcp_write_timer+0x66/0x170 call_timer_fn+0xfb/0x1d0 __run_timers+0x3f8/0x480 run_timer_softirq+0x9b/0x100 handle_softirqs+0x153/0x390 __irq_exit_rcu+0x103/0x120 irq_exit_rcu+0xe/0x20 sysvec_apic_timer_interrupt+0x76/0x90 asm_sysvec_apic_timer_interrupt+0x1a/0x20 RIP: 0010:default_idle+0xf/0x20 Code: 4c 01 c7 4c 29 c2 e9 72 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 66 90 0f 00 2d 33 f8 25 00 fb f4 c3 cc", + "vulnerability_id": "CVE-2024-53168", + "name": "CVE-2024-53168", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-53168.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-53168 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: drm/amdgpu: fix usage slab after free [ +0.000021] BUG: KASAN: slab-use-after-free in drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000027] Read of size 8 at addr ffff8881b8605f88 by task amd_pci_unplug/2147 [ +0.000023] CPU: 6 PID: 2147 Comm: amd_pci_unplug Not tainted 6.10.0+ #1 [ +0.000016] Hardware name: ASUS System Product Name/ROG STRIX B550-F GAMING (WI-FI), BIOS 1401 12/03/2020 [ +0.000016] Call Trace: [ +0.000008] [ +0.000009] dump_stack_lvl+0x76/0xa0 [ +0.000017] print_report+0xce/0x5f0 [ +0.000017] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000019] ? srso_return_thunk+0x5/0x5f [ +0.000015] ? kasan_complete_mode_report_info+0x72/0x200 [ +0.000016] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000019] kasan_report+0xbe/0x110 [ +0.000015] ? drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.000023] __asan_report_load8_noabort+0x14/0x30 [ +0.000014] drm_sched_entity_flush+0x6cb/0x7a0 [gpu_sched] [ +0.00", - "vulnerability_id": "CVE-2024-56551", - "name": "CVE-2024-56551", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-56551.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2024-56551 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: smb: client: fix potential deadlock when releasing mids All release_mid() callers seem to hold a reference of @mid so there is no need to call kref_put(&mid->refcount, __release_mid) under @server->mid_lock spinlock. If they don't, then an use-after-free bug would have occurred anyways. By getting rid of such spinlock also fixes a potential deadlock as shown below CPU 0 CPU 1 ------------------------------------------------------------------ cifs_demultiplex_thread() cifs_debug_data_proc_show() release_mid() spin_lock(&server->mid_lock); spin_lock(&cifs_tcp_ses_lock) spin_lock(&server->mid_lock) __release_mid() smb2_find_smb_tcon() spin_lock(&cifs_tcp_ses_lock) deadlock", + "vulnerability_id": "CVE-2023-52757", + "name": "CVE-2023-52757", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2023/CVE-2023-52757.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2023-52757 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: of: module: add buffer overflow check in of_modalias() In of_modalias(), if the buffer happens to be too small even for the 1st snprintf() call, the len parameter will become negative and str parameter (if not NULL initially) will point beyond the buffer's end. Add the buffer overflow check after the 1st snprintf() call and fix such check after the strlen() call (accounting for the terminating NUL char).", - "vulnerability_id": "CVE-2024-38541", - "name": "CVE-2024-38541", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 9.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 9.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "CRITICAL", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-38541.html", - "source": "UBUNTU_CVE", - "severity": "CRITICAL", - "status": "ACTIVE", - "title": "CVE-2024-38541 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: tracing: Fix use-after-free in print_graph_function_flags during tracer switching Kairui reported a UAF issue in print_graph_function_flags() during ftrace stress testing [1]. This issue can be reproduced if puting a 'mdelay(10)' after 'mutex_unlock(&trace_types_lock)' in s_start(), and executing the following script: $ echo function_graph > current_tracer $ cat trace > /dev/null & $ sleep 5 # Ensure the 'cat' reaches the 'mdelay(10)' point $ echo timerlat > current_tracer The root cause lies in the two calls to print_graph_function_flags within print_trace_line during each s_show(): * One through 'iter->trace->print_line()'; * Another through 'event->funcs->trace()', which is hidden in print_trace_fmt() before print_trace_line returns. Tracer switching only updates the former, while the latter continues to use the print_line function of the old tracer, which in the script above is print_graph_function_flags. Moreover, when switching from the", + "vulnerability_id": "CVE-2025-22035", + "name": "CVE-2025-22035", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22035.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-22035 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: net: atm: fix use after free in lec_send() The ->send() operation frees skb so save the length before calling ->send() to avoid a use after free.", - "vulnerability_id": "CVE-2025-22004", - "name": "CVE-2025-22004", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22004.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2025-22004 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: Bluetooth: L2CAP: Fix uaf in l2cap_connect [Syzbot reported] BUG: KASAN: slab-use-after-free in l2cap_connect.constprop.0+0x10d8/0x1270 net/bluetooth/l2cap_core.c:3949 Read of size 8 at addr ffff8880241e9800 by task kworker/u9:0/54", + "vulnerability_id": "CVE-2024-49950", + "name": "CVE-2024-49950", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-49950.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-49950 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: scsi: iscsi_tcp: Fix UAF during logout when accessing the shost ipaddress Bug report and analysis from Ding Hui. During iSCSI session logout, if another task accesses the shost ipaddress attr, we can get a KASAN UAF report like this: [ 276.942144] BUG: KASAN: use-after-free in _raw_spin_lock_bh+0x78/0xe0 [ 276.942535] Write of size 4 at addr ffff8881053b45b8 by task cat/4088 [ 276.943511] CPU: 2 PID: 4088 Comm: cat Tainted: G E 6.1.0-rc8+ #3 [ 276.943997] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 [ 276.944470] Call Trace: [ 276.944943] [ 276.945397] dump_stack_lvl+0x34/0x48 [ 276.945887] print_address_description.constprop.0+0x86/0x1e7 [ 276.946421] print_report+0x36/0x4f [ 276.947358] kasan_report+0xad/0x130 [ 276.948234] kasan_check_range+0x35/0x1c0 [ 276.948674] _raw_spin_lock_bh+0x78/0xe0 [ 276.949989] iscsi_sw_tcp_host_get_param+0xad/0x2e0 [iscsi_tcp] [ 276.951765] s", - "vulnerability_id": "CVE-2023-52975", - "name": "CVE-2023-52975", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2023/CVE-2023-52975.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2023-52975 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: smb: client: fix UAF in async decryption Doing an async decryption (large read) crashes with a slab-use-after-free way down in the crypto API. Reproducer: # mount.cifs -o ...,seal,esize=1 //srv/share /mnt # dd if=/mnt/largefile of=/dev/null", + "vulnerability_id": "CVE-2024-50047", + "name": "CVE-2024-50047", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-50047.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2024-50047 - linux-libc-dev", + "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: tty: n_gsm: Fix use-after-free in gsm_cleanup_mux BUG: KASAN: slab-use-after-free in gsm_cleanup_mux+0x77b/0x7b0 drivers/tty/n_gsm.c:3160 [n_gsm] Read of size 8 at addr ffff88815fe99c00 by task poc/3379 CPU: 0 UID: 0 PID: 3379 Comm: poc Not tainted 6.11.0+ #56 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 Call Trace: gsm_cleanup_mux+0x77b/0x7b0 drivers/tty/n_gsm.c:3160 [n_gsm] __pfx_gsm_cleanup_mux+0x10/0x10 drivers/tty/n_gsm.c:3124 [n_gsm] __pfx_sched_clock_cpu+0x10/0x10 kernel/sched/clock.c:389 update_load_avg+0x1c1/0x27b0 kernel/sched/fair.c:4500 __pfx_min_vruntime_cb_rotate+0x10/0x10 kernel/sched/fair.c:846 __rb_insert_augmented+0x492/0xbf0 lib/rbtree.c:161 gsmld_ioctl+0x395/0x1450 drivers/tty/n_gsm.c:3408 [n_gsm] _raw_spin_lock_irqsave+0x92/0xf0 arch/x86/include/asm/atomic.h:107 __pfx_gsmld_ioctl+0x10/0x10 drivers/tty/n_gsm.c:3822 [n_gsm] ktime_get+0x5e/0x140 kernel/time", - "vulnerability_id": "CVE-2024-50073", - "name": "CVE-2024-50073", - "package_name": "linux-libc-dev", - "package_details": { - "file_path": null, - "name": "linux-libc-dev", - "package_manager": "OS", - "version": "5.4.0", - "release": "216.236" - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 7.8, - "cvss_v30_score": 0.0, - "cvss_v31_score": 7.8, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://people.canonical.com/~ubuntu-security/cve/2024/CVE-2024-50073.html", - "source": "UBUNTU_CVE", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2024-50073 - linux-libc-dev", - "reason_to_ignore": "N/A" + "description": "In the Linux kernel, the following vulnerability has been resolved: iscsi_ibft: Fix UBSAN shift-out-of-bounds warning in ibft_attr_show_nic() When performing an iSCSI boot using IPv6, iscsistart still reads the /sys/firmware/ibft/ethernetX/subnet-mask entry. Since the IPv6 prefix length is 64, this causes the shift exponent to become negative, triggering a UBSAN warning. As the concept of a subnet mask does not apply to IPv6, the value is set to ~0 to suppress the warning message.", + "vulnerability_id": "CVE-2025-21993", + "name": "CVE-2025-21993", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.1, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.1, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-21993.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-21993 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: memstick: rtsx_usb_ms: Fix slab-use-after-free in rtsx_usb_ms_drv_remove This fixes the following crash: ================================================================== BUG: KASAN: slab-use-after-free in rtsx_usb_ms_poll_card+0x159/0x200 [rtsx_usb_ms] Read of size 8 at addr ffff888136335380 by task kworker/6:0/140241", + "vulnerability_id": "CVE-2025-22020", + "name": "CVE-2025-22020", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-22020.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-22020 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes Currently, load_microcode_amd() iterates over all NUMA nodes, retrieves their CPU masks and unconditionally accesses per-CPU data for the first CPU of each mask. According to Documentation/admin-guide/mm/numaperf.rst: "Some memory may share the same node as a CPU, and others are provided as memory only nodes." Therefore, some node CPU masks may be empty and wouldn't have a "first CPU". On a machine with far memory (and therefore CPU-less NUMA nodes): - cpumask_of_node(nid) is 0 - cpumask_first(0) is CONFIG_NR_CPUS - cpu_data(CONFIG_NR_CPUS) accesses the cpu_info per-CPU array at an index that is 1 out of bounds", + "vulnerability_id": "CVE-2025-21991", + "name": "CVE-2025-21991", + "package_name": "linux-libc-dev", + "package_details": { + "file_path":null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-21991.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-21991 - linux-libc-dev", + "reason_to_ignore": "N/A" + }, + { + "description": "In the Linux kernel, the following vulnerability has been resolved: jfs: fix slab-out-of-bounds read in ea_get() During the "size_check" label in ea_get(), the code checks if the extended attribute list (xattr) size matches ea_size. If not, it logs "ea_get: invalid extended attribute" and calls print_hex_dump(). Here, EALIST_SIZE(ea_buf->xattr) returns 4110417968, which exceeds INT_MAX (2,147,483,647). Then ea_size is clamped: int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr)); Although clamp_t aims to bound ea_size between 0 and 4110417968, the upper limit is treated as an int, causing an overflow above 2^31 - 1.", + "vulnerability_id": "CVE-2025-39735", + "name": "CVE-2025-39735", + "package_name": "linux-libc-dev", + "package_details": { + "file_path": null, + "name": "linux-libc-dev", + "package_manager": "OS", + "version": "5.4.0", + "release": "216.236" + }, + "remediation": {"recommendation": {"text": "None Provided"}}, + "cvss_v3_score": 7.1, + "cvss_v30_score": 0.0, + "cvss_v31_score": 7.1, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://people.canonical.com/~ubuntu-security/cve/2025/CVE-2025-39735.html", + "source": "UBUNTU_CVE", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2025-39735 - linux-libc-dev", + "reason_to_ignore": "N/A" } ] } From 616dfdc4c757b19a6b2f13cb1c8136efbf1cde58 Mon Sep 17 00:00:00 2001 From: jkottu Date: Thu, 25 Sep 2025 12:52:19 -0700 Subject: [PATCH 42/49] build tf 2.18 --- tensorflow/training/docker/2.18/py3/Dockerfile.cpu | 2 +- tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/training/docker/2.18/py3/Dockerfile.cpu b/tensorflow/training/docker/2.18/py3/Dockerfile.cpu index 04a7eae8d4cc..57bb2594af59 100644 --- a/tensorflow/training/docker/2.18/py3/Dockerfile.cpu +++ b/tensorflow/training/docker/2.18/py3/Dockerfile.cpu @@ -264,7 +264,7 @@ RUN ${PIP} install --no-cache-dir -U \ "tensorflow-datasets==4.9.7" RUN $PYTHON -m pip install --no-cache-dir -U \ - numba==0.61.0 \ + numba \ bokeh \ imageio \ opencv-python \ diff --git a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu index c8b17f2ca0e9..1d57bc1710e1 100644 --- a/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu +++ b/tensorflow/training/docker/2.18/py3/cu125/Dockerfile.gpu @@ -367,7 +367,7 @@ RUN ${PIP} install --no-cache-dir -U \ "tensorflow-datasets==4.9.7" RUN $PYTHON -m pip install --no-cache-dir -U \ - numba==0.61.0 \ + numba \ bokeh \ imageio \ opencv-python \ @@ -382,6 +382,7 @@ RUN $PYTHON -m pip install --no-cache-dir -U \ RUN $PYTHON -m pip install --no-cache-dir -U \ sagemaker-experiments==0.1.45 +#pinning old version because of protobuf dependency with tensorflow-metadata RUN $PYTHON -m pip install --no-cache-dir -U \ sagemaker-training==4.8.4 From a32cbbb9f65ecdf0816e61216598f12bd859ce56 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 16:34:46 -0700 Subject: [PATCH 43/49] remove uv changes --- tensorflow/training/docker/2.18/py3/Dockerfile.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/training/docker/2.18/py3/Dockerfile.cpu b/tensorflow/training/docker/2.18/py3/Dockerfile.cpu index 57bb2594af59..04a7eae8d4cc 100644 --- a/tensorflow/training/docker/2.18/py3/Dockerfile.cpu +++ b/tensorflow/training/docker/2.18/py3/Dockerfile.cpu @@ -264,7 +264,7 @@ RUN ${PIP} install --no-cache-dir -U \ "tensorflow-datasets==4.9.7" RUN $PYTHON -m pip install --no-cache-dir -U \ - numba \ + numba==0.61.0 \ bokeh \ imageio \ opencv-python \ From 1f42068bcb688b1733f969a1bd6d3ad8055825e5 Mon Sep 17 00:00:00 2001 From: Jyothirmai Kottu Date: Sun, 28 Sep 2025 20:30:59 -0700 Subject: [PATCH 44/49] fix delimiter (#5311) --- .../py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json | 6 +++--- .../Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json b/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json index 92c19e1a1f3b..7e2bbceed8c9 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.ec2.arm64.cpu.os_scan_allowlist.json @@ -557,7 +557,7 @@ "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes Currently, load_microcode_amd() iterates over all NUMA nodes, retrieves their CPU masks and unconditionally accesses per-CPU data for the first CPU of each mask. According to Documentation/admin-guide/mm/numaperf.rst: "Some memory may share the same node as a CPU, and others are provided as memory only nodes." Therefore, some node CPU masks may be empty and wouldn't have a "first CPU". On a machine with far memory (and therefore CPU-less NUMA nodes): - cpumask_of_node(nid) is 0 - cpumask_first(0) is CONFIG_NR_CPUS - cpu_data(CONFIG_NR_CPUS) accesses the cpu_info per-CPU array at an index that is 1 out of bounds", + "description": "In the Linux kernel, the following vulnerability has been resolved: x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes Currently, load_microcode_amd() iterates over all NUMA nodes, retrieves their CPU masks and unconditionally accesses per-CPU data for the first CPU of each mask. According to Documentation/admin-guide/mm/numaperf.rst: ""Some memory may share the same node as a CPU, and others are provided as memory only nodes."" Therefore, some node CPU masks may be empty and wouldn't have a ""first CPU"". On a machine with far memory (and therefore CPU-less NUMA nodes): - cpumask_of_node(nid) is 0 - cpumask_first(0) is CONFIG_NR_CPUS - cpu_data(CONFIG_NR_CPUS) accesses the cpu_info per-CPU array at an index that is 1 out of bounds", "vulnerability_id": "CVE-2025-21991", "name": "CVE-2025-21991", "package_name": "linux-libc-dev", @@ -582,7 +582,7 @@ "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: jfs: fix slab-out-of-bounds read in ea_get() During the "size_check" label in ea_get(), the code checks if the extended attribute list (xattr) size matches ea_size. If not, it logs "ea_get: invalid extended attribute" and calls print_hex_dump(). Here, EALIST_SIZE(ea_buf->xattr) returns 4110417968, which exceeds INT_MAX (2,147,483,647). Then ea_size is clamped: int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr)); Although clamp_t aims to bound ea_size between 0 and 4110417968, the upper limit is treated as an int, causing an overflow above 2^31 - 1.", + "description": "In the Linux kernel, the following vulnerability has been resolved: jfs: fix slab-out-of-bounds read in ea_get() During the "size_check" label in ea_get(), the code checks if the extended attribute list (xattr) size matches ea_size. If not, it logs ""ea_get: invalid extended attribute"" and calls print_hex_dump(). Here, EALIST_SIZE(ea_buf->xattr) returns 4110417968, which exceeds INT_MAX (2,147,483,647). Then ea_size is clamped: int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr)); Although clamp_t aims to bound ea_size between 0 and 4110417968, the upper limit is treated as an int, causing an overflow above 2^31 - 1.", "vulnerability_id": "CVE-2025-39735", "name": "CVE-2025-39735", "package_name": "linux-libc-dev", @@ -607,4 +607,4 @@ "reason_to_ignore": "N/A" } ] -} \ No newline at end of file +} diff --git a/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json b/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json index 82eceaeb893c..a6d9eb7a1c91 100644 --- a/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json +++ b/tensorflow/inference/docker/2.18/py3/Dockerfile.sagemaker.arm64.cpu.os_scan_allowlist.json @@ -380,7 +380,7 @@ "reason_to_ignore": "N/A" } ], - "linux-libc-dev": [ + "linux-libc-dev": [ { "description": "In the Linux kernel, the following vulnerability has been resolved: sunrpc: fix one UAF issue caused by sunrpc kernel tcp socket BUG: KASAN: slab-use-after-free in tcp_write_timer_handler+0x156/0x3e0 Read of size 1 at addr ffff888111f322cd by task swapper/0/0 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.0-rc4-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 Call Trace: dump_stack_lvl+0x68/0xa0 print_address_description.constprop.0+0x2c/0x3d0 print_report+0xb4/0x270 kasan_report+0xbd/0xf0 tcp_write_timer_handler+0x156/0x3e0 tcp_write_timer+0x66/0x170 call_timer_fn+0xfb/0x1d0 __run_timers+0x3f8/0x480 run_timer_softirq+0x9b/0x100 handle_softirqs+0x153/0x390 __irq_exit_rcu+0x103/0x120 irq_exit_rcu+0xe/0x20 sysvec_apic_timer_interrupt+0x76/0x90 asm_sysvec_apic_timer_interrupt+0x1a/0x20 RIP: 0010:default_idle+0xf/0x20 Code: 4c 01 c7 4c 29 c2 e9 72 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 66 90 0f 00 2d 33 f8 25 00 fb f4 c3 cc", "vulnerability_id": "CVE-2024-53168", @@ -557,7 +557,7 @@ "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes Currently, load_microcode_amd() iterates over all NUMA nodes, retrieves their CPU masks and unconditionally accesses per-CPU data for the first CPU of each mask. According to Documentation/admin-guide/mm/numaperf.rst: "Some memory may share the same node as a CPU, and others are provided as memory only nodes." Therefore, some node CPU masks may be empty and wouldn't have a "first CPU". On a machine with far memory (and therefore CPU-less NUMA nodes): - cpumask_of_node(nid) is 0 - cpumask_first(0) is CONFIG_NR_CPUS - cpu_data(CONFIG_NR_CPUS) accesses the cpu_info per-CPU array at an index that is 1 out of bounds", + "description": "In the Linux kernel, the following vulnerability has been resolved: x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes Currently, load_microcode_amd() iterates over all NUMA nodes, retrieves their CPU masks and unconditionally accesses per-CPU data for the first CPU of each mask. According to Documentation/admin-guide/mm/numaperf.rst: ""Some memory may share the same node as a CPU, and others are provided as memory only nodes."" Therefore, some node CPU masks may be empty and wouldn't have a ""first CPU"". On a machine with far memory (and therefore CPU-less NUMA nodes): - cpumask_of_node(nid) is 0 - cpumask_first(0) is CONFIG_NR_CPUS - cpu_data(CONFIG_NR_CPUS) accesses the cpu_info per-CPU array at an index that is 1 out of bounds", "vulnerability_id": "CVE-2025-21991", "name": "CVE-2025-21991", "package_name": "linux-libc-dev", @@ -582,7 +582,7 @@ "reason_to_ignore": "N/A" }, { - "description": "In the Linux kernel, the following vulnerability has been resolved: jfs: fix slab-out-of-bounds read in ea_get() During the "size_check" label in ea_get(), the code checks if the extended attribute list (xattr) size matches ea_size. If not, it logs "ea_get: invalid extended attribute" and calls print_hex_dump(). Here, EALIST_SIZE(ea_buf->xattr) returns 4110417968, which exceeds INT_MAX (2,147,483,647). Then ea_size is clamped: int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr)); Although clamp_t aims to bound ea_size between 0 and 4110417968, the upper limit is treated as an int, causing an overflow above 2^31 - 1.", + "description": "In the Linux kernel, the following vulnerability has been resolved: jfs: fix slab-out-of-bounds read in ea_get() During the "size_check" label in ea_get(), the code checks if the extended attribute list (xattr) size matches ea_size. If not, it logs ""ea_get: invalid extended attribute"" and calls print_hex_dump(). Here, EALIST_SIZE(ea_buf->xattr) returns 4110417968, which exceeds INT_MAX (2,147,483,647). Then ea_size is clamped: int size = clamp_t(int, ea_size, 0, EALIST_SIZE(ea_buf->xattr)); Although clamp_t aims to bound ea_size between 0 and 4110417968, the upper limit is treated as an int, causing an overflow above 2^31 - 1.", "vulnerability_id": "CVE-2025-39735", "name": "CVE-2025-39735", "package_name": "linux-libc-dev", From ff341825fdd6a435516912b5add3c91192345968 Mon Sep 17 00:00:00 2001 From: ethnzhng <26497102+ethnzhng@users.noreply.github.com> Date: Mon, 29 Sep 2025 10:44:38 -0700 Subject: [PATCH 45/49] [djl-serving] Add 0.34.0 lmi image release (#5315) --- release_images_inference.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/release_images_inference.yml b/release_images_inference.yml index 34ab81fe58c5..1502839a989e 100644 --- a/release_images_inference.yml +++ b/release_images_inference.yml @@ -249,3 +249,16 @@ release_images: example: False disable_sm_tag: True force_release: False + 20: + framework: "djl" + version: "0.34.0" + arch_type: "x86" + inference: + device_types: [ "gpu" ] + python_versions: [ "py312" ] + os_version: "ubuntu24.04" + lmi_version: "16.0.0" + cuda_version: "cu128" + example: False + disable_sm_tag: True + force_release: False From ea6f42f6b02173e5728e67a147b2c4f8d7155ad5 Mon Sep 17 00:00:00 2001 From: Sirut Buasai <73297481+sirutBuasai@users.noreply.github.com> Date: Mon, 29 Sep 2025 11:53:57 -0700 Subject: [PATCH 46/49] change codeowners (#5316) --- .github/CODEOWNERS | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4e74be27bf7a..6ace62deacab 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -50,10 +50,10 @@ dgl_tests/ @aws/dlc-dgl-reviewers *triton* @aws/dlc-triton-reviewers # Files under stabilityai/ and huggingface/ directories can be directly reviewed by below teams -stabilityai/ @aws/sagemaker-1p-algorithms -huggingface/ @aws/sagemaker-1p-algorithms -test/sagemaker_tests/huggingface/ @aws/sagemaker-1p-algorithms -test/sagemaker_tests/huggingface_pytorch/ @aws/sagemaker-1p-algorithms -test/sagemaker_tests/huggingface_tensorflow/ @aws/sagemaker-1p-algorithms -test/sagemaker_tests/pytorch/inference/integration/sagemaker/test_stabilityai.py @aws/sagemaker-1p-algorithms -test/sagemaker_tests/pytorch/inference/resources/stabilityai/ @aws/sagemaker-1p-algorithms +stabilityai/ @aws/dl-containers +huggingface/ @aws/dl-containers +test/sagemaker_tests/huggingface/ @aws/dl-containers +test/sagemaker_tests/huggingface_pytorch/ @aws/dl-containers +test/sagemaker_tests/huggingface_tensorflow/ @aws/dl-containers +test/sagemaker_tests/pytorch/inference/integration/sagemaker/test_stabilityai.py @aws/dl-containers +test/sagemaker_tests/pytorch/inference/resources/stabilityai/ @aws/dl-containers From 2ca724847eb331e5f64db8238a39bc46c38094e1 Mon Sep 17 00:00:00 2001 From: Ahsan Khan Date: Mon, 29 Sep 2025 12:09:45 -0700 Subject: [PATCH 47/49] fix: update neuronx huggingface tests (#5314) * fix: update neuronx huggingface tests * black formatter * Add inference_ami_version=al2-ami-sagemaker-inference-neuron-2 to all neuronx tests --------- Co-authored-by: Sirut Buasai <73297481+sirutBuasai@users.noreply.github.com> --- .../integration/sagemaker/test_neuronx_decoder_hosting.py | 1 + .../integration/sagemaker/test_neuronx_encoder_hosting.py | 1 + .../inference/integration/sagemaker/test_neuronx_no_context.py | 1 + .../inference/integration/sagemaker/test_neuronx_sdxl_hosting.py | 1 + 4 files changed, 4 insertions(+) diff --git a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_decoder_hosting.py b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_decoder_hosting.py index b731d520590e..b668643c3e83 100644 --- a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_decoder_hosting.py +++ b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_decoder_hosting.py @@ -145,6 +145,7 @@ def _test_pt_neuronx( initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name, + inference_ami_version="al2-ami-sagemaker-inference-neuron-2", ) data = {"inputs": "I really wish "} diff --git a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_encoder_hosting.py b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_encoder_hosting.py index 16ae18326f5c..069feee508b2 100644 --- a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_encoder_hosting.py +++ b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_encoder_hosting.py @@ -145,6 +145,7 @@ def _test_pt_neuronx( initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name, + inference_ami_version="al2-ami-sagemaker-inference-neuron-2", ) data = {"inputs": "Hamilton is considered to be the best musical of human history."} diff --git a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_no_context.py b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_no_context.py index 1a298087baf2..277d63b2688b 100644 --- a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_no_context.py +++ b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_no_context.py @@ -127,6 +127,7 @@ def _test_sentence_transformers( initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name, + inference_ami_version="al2-ami-sagemaker-inference-neuron-2", ) predictor.serializer = IdentitySerializer(content_type="application/json") diff --git a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_sdxl_hosting.py b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_sdxl_hosting.py index a29665a48150..5cd464303942 100644 --- a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_sdxl_hosting.py +++ b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_neuronx_sdxl_hosting.py @@ -146,6 +146,7 @@ def _test_pt_neuronx( initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name, + inference_ami_version="al2-ami-sagemaker-inference-neuron-2", ) data = {"inputs": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"} From 35b5a83d4b7a055a248f0b22aab2599f4ecfba32 Mon Sep 17 00:00:00 2001 From: Jinyan Li <97153458+jinyan-li1@users.noreply.github.com> Date: Mon, 29 Sep 2025 13:03:07 -0700 Subject: [PATCH 48/49] Allowlisting for PyTorch 2.6 inference ARM image (#5310) * Allowlisting for PyTorch 2.6 inference ARM image --- .../py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pytorch/inference/docker/2.6/py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json diff --git a/pytorch/inference/docker/2.6/py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json b/pytorch/inference/docker/2.6/py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json new file mode 100644 index 000000000000..4882e42c6ceb --- /dev/null +++ b/pytorch/inference/docker/2.6/py3/Dockerfile.sagemaker.arm64.cpu.py_scan_allowlist.json @@ -0,0 +1,3 @@ +{ + "78828": "Affected versions of the PyTorch package are vulnerable to Denial of Service (DoS) due to improper handling in the MKLDNN pooling implementation. The torch.mkldnn_max_pool2d function fails to properly validate input parameters, allowing crafted inputs to trigger resource exhaustion or crashes in the underlying MKLDNN library. An attacker with local access can exploit this vulnerability by passing specially crafted tensor dimensions or parameters to the max pooling function, causing the application to become unresponsive or crash." +} From ff1ed6d091fab760c15fda7457d9c00d4d058606 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 29 Sep 2025 16:38:06 -0700 Subject: [PATCH 49/49] test pytorch 2.8 ec2 --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index ab12df19dfb6..fa6c1df44f6e 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -122,7 +122,7 @@ use_scheduler = false dlc-pr-base = "" # Standard Framework Training -dlc-pr-pytorch-training = "pytorch/training/buildspec-2-7-ec2.yml" +dlc-pr-pytorch-training = "pytorch/training/buildspec-2-8-ec2.yml" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = ""