diff --git a/data/common-ecr-scan-allowlist.json b/data/common-ecr-scan-allowlist.json index 3153f937f0c8..d2e390e8a579 100644 --- a/data/common-ecr-scan-allowlist.json +++ b/data/common-ecr-scan-allowlist.json @@ -269,5 +269,65 @@ "title": "CVE-2025-32434 - torch", "reason_to_ignore": "this container is specifically pytorch 2.5.x so we can’t upgrade to 2.6" } + ], + "tar": [ + { + "description": "node-tar,a Tar for Node.js, contains a vulnerability in versions prior to 7.5.7 where the security check for hardlink entries uses different path resolution semantics than the actual hardlink creation logic. This mismatch allows an attacker to craft a malicious TAR archive that bypasses path traversal protections and creates hardlinks to arbitrary files outside the extraction directory. Version 7.5.7 contains a fix for the issue.", + "vulnerability_id": "CVE-2026-24842", + "name": "CVE-2026-24842", + "package_name": "tar", + "package_details": { + "file_path": "/usr/local/lib/python3.12/site-packages/jupyterlab/staging/yarn.lock", + "name": "tar", + "package_manager": "NODE", + "version": "6.1.11", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.2, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.2, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://nvd.nist.gov/vuln/detail/CVE-2026-24842", + "source": "NVD", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2026-24842 - tar", + "reason_to_ignore": "Transitive dependency in jupyterlab staging yarn.lock - not directly exploitable in training container context" + }, + { + "description": "node-tar,a Tar for Node.js, has a race condition vulnerability in versions up to and including 7.5.3. This is due to an incomplete handling of Unicode path collisions in the `path-reservations` system. On case-insensitive or normalization-insensitive filesystems (such as macOS APFS, In which it has been tested), the library fails to lock colliding paths (e.g., `ß` and `ss`), allowing them to be processed in parallel. This bypasses the library's internal concurrency safeguards and permits Symlink Poisoning attacks via race conditions. The library uses a `PathReservations` system to ensure that metadata checks and file operations for the same path are serialized. This prevents race conditions where one entry might clobber another concurrently. This is a Race Condition which enables Arbitrary File Overwrite. This vulnerability affects users and systems using node-tar on macOS (APFS/HFS+). Because of using `NFD` Unicode normalization (in which `ß` and `ss` are different), conflicting paths do not have their order", + "vulnerability_id": "CVE-2026-23950", + "name": "CVE-2026-23950", + "package_name": "tar", + "package_details": { + "file_path": "/usr/local/lib/python3.12/site-packages/jupyterlab/staging/yarn.lock", + "name": "tar", + "package_manager": "NODE", + "version": "6.1.11", + "release": null + }, + "remediation": { + "recommendation": { + "text": "None Provided" + } + }, + "cvss_v3_score": 8.8, + "cvss_v30_score": 0.0, + "cvss_v31_score": 8.8, + "cvss_v2_score": 0.0, + "cvss_v3_severity": "HIGH", + "source_url": "https://nvd.nist.gov/vuln/detail/CVE-2026-23950", + "source": "NVD", + "severity": "HIGH", + "status": "ACTIVE", + "title": "CVE-2026-23950 - tar", + "reason_to_ignore": "Transitive dependency in jupyterlab staging yarn.lock - not directly exploitable in training container context" + } ] } \ No newline at end of file diff --git a/data/common-safety-ignorelist.json b/data/common-safety-ignorelist.json index 1c9fe53cd26f..deb10dabc1fd 100644 --- a/data/common-safety-ignorelist.json +++ b/data/common-safety-ignorelist.json @@ -1,5 +1,6 @@ { "51358": "Safety is not packaged with container, it is the result of security scanning process", + "85151": "Protobuf CVE in TensorFlow 2.19 - upstream dependency, cannot be patched without TF upgrade", "76769" : "A vulnerability, which was classified as problematic, was found in PyTorch 2.6.0. Which was built with container and cannot be patched.", "76771" : "A vulnerability, which was classified as problematic, was found in Pytorch version 2.5.1 and prior. Which was built with container and cannot be patched.", "78828" : "A DoS vulnerability in MKLDNN pooling implementation affecting PyTorch versions < 2.7.1-rc1. Container is PyTorch 2.6.0 and cannot be upgraded to 2.7.1." diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 207784d1f191..c71759d2d3ca 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,12 +37,12 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] +build_frameworks = ["tensorflow"] # By default we build both training and inference containers. Set true/false values to determine which to build. build_training = true -build_inference = true +build_inference = false # Set do_build to "false" to skip builds and test the latest image built by this PR # Note: at least one build is required to set do_build to "false" @@ -61,13 +61,13 @@ notify_test_failures = false use_new_test_structure = false ### On by default -sanity_tests = true +sanity_tests = false security_tests = true safety_check_test = false ecr_scan_allowlist_feature = false -ecs_tests = true -eks_tests = true -ec2_tests = true +ecs_tests = false +eks_tests = false +ec2_tests = false # Set it to true if you are preparing a Benchmark related PR ec2_benchmark_tests = false @@ -78,7 +78,7 @@ ec2_benchmark_tests = false ec2_tests_on_heavy_instances = false ### SM specific tests ### On by default -sagemaker_local_tests = true +sagemaker_local_tests = false ### Set enable_ipv6 = true to run tests with IPv6-enabled resources ### Off by default (set to false) enable_ipv6 = false @@ -96,7 +96,7 @@ enable_ipv6 = false ipv6_vpc_name = "" # run standard sagemaker remote tests from test/sagemaker_tests -sagemaker_remote_tests = true +sagemaker_remote_tests = false # run efa sagemaker tests sagemaker_efa_tests = false # run release_candidate_integration tests @@ -125,7 +125,7 @@ dlc-pr-base = "" # Standard Framework Training dlc-pr-pytorch-training = "" -dlc-pr-tensorflow-2-training = "" +dlc-pr-tensorflow-2-training = "tensorflow/training/buildspec-2-19-sm.yml" dlc-pr-autogluon-training = "" # ARM64 Training diff --git a/tensorflow/training/buildspec-2-19-sm.yml b/tensorflow/training/buildspec-2-19-sm.yml index 2e2fef5c0c34..002e4c4f6e55 100644 --- a/tensorflow/training/buildspec-2-19-sm.yml +++ b/tensorflow/training/buildspec-2-19-sm.yml @@ -5,7 +5,7 @@ framework: &FRAMEWORK tensorflow version: &VERSION 2.19.0 short_version: &SHORT_VERSION "2.19" arch_type: x86 -autopatch_build: "True" +autopatch_build: "False" repository_info: training_repository: &TRAINING_REPOSITORY diff --git a/tensorflow/training/docker/2.19/py3/Dockerfile.cpu b/tensorflow/training/docker/2.19/py3/Dockerfile.cpu index 00d9b12a1418..7d6022ff974a 100644 --- a/tensorflow/training/docker/2.19/py3/Dockerfile.cpu +++ b/tensorflow/training/docker/2.19/py3/Dockerfile.cpu @@ -134,11 +134,15 @@ RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools +# Install UV for faster package installation +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:${PATH}" + # Some TF tools expect a "python" binary RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ && ln -s $(which ${PIP}) /usr/bin/pip -RUN ${PIP} install --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ pybind11 \ cmake \ scipy \ @@ -148,9 +152,6 @@ RUN ${PIP} install --no-cache-dir -U \ "awscli<2" \ urllib3 \ mpi4py \ - # Let's install TensorFlow separately in the end to avoid - # the library version to be overwritten - && ${PIP} install --no-cache-dir -U \ h5py \ absl-py \ opencv-python \ @@ -189,7 +190,7 @@ RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-${TF_VERSION}/lice FROM common AS ec2 ARG TF_URL -RUN ${PIP} install --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ ${TF_URL} \ "tensorflow-io==0.37.*" \ tensorflow-datasets @@ -255,7 +256,7 @@ RUN apt-get update \ # and this is fine since sagemaker is more important than the models and # the models still work on pyyaml 6 in this context. # Need to install wheel before we can fix the pyyaml issue below -RUN pip install --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ wheel \ "cython<3" \ "pyyaml<6" \ @@ -263,42 +264,38 @@ RUN pip install --no-cache-dir -U \ # https://github.com/tensorflow/models/issues/9267 # tf-models does not respect existing installations of TF and always installs open source TF -RUN ${PIP} install \ - --default-timeout=300 \ - --retries 5 \ - --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ "tf-models-official==${TF_VERSION}.1" \ "tensorflow-text==${TF_VERSION}.0" \ && ${PIP} uninstall -y tensorflow tensorflow-gpu \ - && ${PIP} install --no-cache-dir -U \ + && uv pip install --system --no-cache-dir \ ${TF_URL} \ "tensorflow-io==0.37.*" \ - tensorflow-datasets - -# Install rust and cargo -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -ENV PATH="/root/.cargo/bin:${PATH}" + tensorflow-datasets \ + && rm -rf /root/.cache/uv /root/.cargo/registry -RUN $PYTHON -m pip install --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ numba \ bokeh \ imageio \ - opencv-python \ + "opencv-python==4.12.0.88" \ plotly \ seaborn \ - shap + "shap<0.50" \ + && rm -rf /root/.cache/uv -RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker \ - sagemaker-experiments \ +RUN uv pip install --system --no-cache-dir \ + "sagemaker>=2.256.0,<3" \ + sagemaker-experiments==0.1.45 \ sagemaker-tensorflow-training \ sagemaker-training \ y-py \ sagemaker-studio-analytics-extension \ - "sparkmagic<1" \ + sparkmagic==0.22.0 \ sagemaker-studio-sparkmagic-lib \ - smclarify - + smclarify \ + && rm -rf /root/.cache/uv + # Remove python kernel installed by sparkmagic RUN /usr/local/bin/jupyter-kernelspec remove -f python3 diff --git a/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.core_packages.json b/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.core_packages.json index a85eca7cad14..ad1eb95ada0f 100644 --- a/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.core_packages.json +++ b/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.core_packages.json @@ -12,6 +12,12 @@ "pyyaml": { "version_specifier": ">=6.0,<6.1" }, + "sagemaker": { + "version_specifier": ">=2.256.0,<3" + }, + "sagemaker-experiments": { + "version_specifier": "<1" + }, "sparkmagic": { "version_specifier": "<1" }, diff --git a/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json b/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json index 9b1c3de9af08..3b17717a0d03 100644 --- a/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json +++ b/tensorflow/training/docker/2.19/py3/Dockerfile.sagemaker.cpu.os_scan_allowlist.json @@ -549,37 +549,6 @@ "reason_to_ignore": "N/A" } ], - "jaraco.context": [ - { - "description": "jaraco.context, an open-source software package that provides some useful decorators and context managers, has a Zip Slip path traversal vulnerability in the `jaraco.context.tarball()` function starting in version 5.2.0 and prior to version 6.1.0. The vulnerability may allow attackers to extract files outside the intended extraction directory when malicious tar archives are processed. The strip_first_component filter splits the path on the first `/` and extracts the second component, while allowing `../` sequences. Paths like `dummy_dir/../../etc/passwd` become `../../etc/passwd`. Note that this suffers from a nested tarball attack as well with multi-level tar files such as `dummy_dir/inner.tar.gz`, where the inner.tar.gz includes a traversal `dummy_dir/../../config/.env` that also gets translated to `../../config/.env`. Version 6.1.0 contains a patch for the issue.", - "vulnerability_id": "CVE-2026-23949", - "name": "CVE-2026-23949", - "package_name": "jaraco.context", - "package_details": { - "file_path": "/usr/local/lib/python3.12/site-packages/setuptools/_vendor/jaraco.context-5.3.0.dist-info/METADATA", - "name": "jaraco.context", - "package_manager": "PYTHON", - "version": "5.3.0", - "release": null - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 8.6, - "cvss_v30_score": 0.0, - "cvss_v31_score": 8.6, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://nvd.nist.gov/vuln/detail/CVE-2026-23949", - "source": "NVD", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2026-23949 - jaraco.context", - "reason_to_ignore": "N/A" - } - ], "qs": [ { "description": "Improper Input Validation vulnerability in qs (parse modules) allows HTTP DoS.This issue affects qs: < 6.14.1.\n\nSummaryThe arrayLimit\u00a0option in qs does not enforce limits for bracket notation (a[]=1&a[]=2), allowing attackers to cause denial-of-service via memory exhaustion. Applications using arrayLimit\u00a0for DoS protection are vulnerable.\n\nDetailsThe arrayLimit\u00a0option only checks limits for indexed notation (a[0]=1&a[1]=2) but completely bypasses it for bracket notation (a[]=1&a[]=2).\n\nVulnerable code\u00a0(lib/parse.js:159-162):\n\nif (root === '[]' && options.parseArrays) { obj = utils.combine([], leaf); // No arrayLimit check }\n\nWorking code\u00a0(lib/parse.js:175):\n\nelse if (index <= options.arrayLimit) { // Limit checked here obj = []; obj[index] = leaf; }\n\nThe bracket notation handler at line 159 uses utils.combine([], leaf)\u00a0without validating against options.arrayLimit, while indexed notation at line 175 checks index <= options.arrayLimit\u00a0before creating arrays.\n\nPoCTest 1 - Basic bypass:\n\nnpm install qs\n\nconst qs", diff --git a/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.gpu b/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.gpu index 229bb32a4fd7..f69b9dfcef12 100644 --- a/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.gpu +++ b/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.gpu @@ -193,14 +193,18 @@ RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSIO RUN ${PIP} --no-cache-dir install --upgrade \ pip \ - setuptools \ + "setuptools>=81" \ wheel +# Install UV for faster package installation +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:${PATH}" + # Some TF tools expect a "python" binary RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ && ln -s $(which ${PIP}) /usr/bin/pip -RUN ${PIP} install --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ pybind11 \ cmake \ scipy \ @@ -269,7 +273,7 @@ FROM common AS ec2 ARG TF_URL -RUN ${PIP} install --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ ${TF_URL} \ "tensorflow-io==0.37.*" \ tensorflow-datasets @@ -323,7 +327,7 @@ ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main # and this is fine since sagemaker is more important than the models and # the models still work on pyyaml 6 in this context. # Need to install wheel before we can fix the pyyaml issue below -RUN pip install --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ wheel \ "cython<3" \ "pyyaml<6" \ @@ -331,45 +335,45 @@ RUN pip install --no-cache-dir -U \ # Pin numpy to 1.26.4 -RUN ${PIP} install --no-cache-dir -U "numpy==1.26.4" +RUN uv pip install --system --no-cache-dir "numpy==1.26.4" # https://github.com/tensorflow/models/issues/9267 # tf-models does not respect existing installations of TF and always installs open source TF -RUN ${PIP} install \ - --default-timeout=300 \ - --retries 5 \ - --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ "tf-models-official==${TF_VERSION}.1" \ "tensorflow-text==${TF_VERSION}.0" \ && ${PIP} uninstall -y tensorflow tensorflow-gpu \ - && ${PIP} install --no-cache-dir -U \ + && uv pip install --system --no-cache-dir \ ${TF_URL} \ "tensorflow-io==0.37.*" \ - tensorflow-datasets - -# Install rust and cargo -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -ENV PATH="/root/.cargo/bin:${PATH}" + tensorflow-datasets \ + && rm -rf /root/.cache/uv /root/.cargo/registry -RUN $PYTHON -m pip install --no-cache-dir -U \ +RUN uv pip install --system --no-cache-dir \ numba \ bokeh \ imageio \ "opencv-python==4.11.0.86" \ plotly \ seaborn \ - shap + "shap<0.50" \ + && rm -rf /root/.cache/uv -RUN $PYTHON -m pip install --no-cache-dir -U \ - sagemaker \ - sagemaker-experiments==0.* \ +RUN uv pip install --system --no-cache-dir \ + "sagemaker>=2.256.0,<3" \ + sagemaker-experiments==0.1.45 \ sagemaker-tensorflow-training \ sagemaker-training \ y-py \ sagemaker-studio-analytics-extension \ - "sparkmagic<1" \ + sparkmagic==0.22.0 \ sagemaker-studio-sparkmagic-lib \ - smclarify + smclarify \ + && rm -rf /root/.cache/uv + +# Re-pin numpy after all installs to ensure TF compatibility +# Also re-sync awscli with botocore after sagemaker packages may have changed botocore version +RUN uv pip install --system --no-cache-dir "numpy==1.26.4" "awscli<2" # install boost # tensorflow is compiled with --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=1" diff --git a/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json b/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json index ada7fec18f69..6149740559c4 100644 --- a/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json +++ b/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.core_packages.json @@ -11,8 +11,11 @@ "pyyaml": { "version_specifier": ">=6.0,<6.1" }, + "sagemaker": { + "version_specifier": ">=2.256.0,<3" + }, "sagemaker-experiments": { - "version_specifier": "==0.*" + "version_specifier": "<1" }, "sparkmagic": { "version_specifier": "<1" diff --git a/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.os_scan_allowlist.json b/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.os_scan_allowlist.json index 9b1c3de9af08..3b17717a0d03 100644 --- a/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.os_scan_allowlist.json +++ b/tensorflow/training/docker/2.19/py3/cu125/Dockerfile.sagemaker.gpu.os_scan_allowlist.json @@ -549,37 +549,6 @@ "reason_to_ignore": "N/A" } ], - "jaraco.context": [ - { - "description": "jaraco.context, an open-source software package that provides some useful decorators and context managers, has a Zip Slip path traversal vulnerability in the `jaraco.context.tarball()` function starting in version 5.2.0 and prior to version 6.1.0. The vulnerability may allow attackers to extract files outside the intended extraction directory when malicious tar archives are processed. The strip_first_component filter splits the path on the first `/` and extracts the second component, while allowing `../` sequences. Paths like `dummy_dir/../../etc/passwd` become `../../etc/passwd`. Note that this suffers from a nested tarball attack as well with multi-level tar files such as `dummy_dir/inner.tar.gz`, where the inner.tar.gz includes a traversal `dummy_dir/../../config/.env` that also gets translated to `../../config/.env`. Version 6.1.0 contains a patch for the issue.", - "vulnerability_id": "CVE-2026-23949", - "name": "CVE-2026-23949", - "package_name": "jaraco.context", - "package_details": { - "file_path": "/usr/local/lib/python3.12/site-packages/setuptools/_vendor/jaraco.context-5.3.0.dist-info/METADATA", - "name": "jaraco.context", - "package_manager": "PYTHON", - "version": "5.3.0", - "release": null - }, - "remediation": { - "recommendation": { - "text": "None Provided" - } - }, - "cvss_v3_score": 8.6, - "cvss_v30_score": 0.0, - "cvss_v31_score": 8.6, - "cvss_v2_score": 0.0, - "cvss_v3_severity": "HIGH", - "source_url": "https://nvd.nist.gov/vuln/detail/CVE-2026-23949", - "source": "NVD", - "severity": "HIGH", - "status": "ACTIVE", - "title": "CVE-2026-23949 - jaraco.context", - "reason_to_ignore": "N/A" - } - ], "qs": [ { "description": "Improper Input Validation vulnerability in qs (parse modules) allows HTTP DoS.This issue affects qs: < 6.14.1.\n\nSummaryThe arrayLimit\u00a0option in qs does not enforce limits for bracket notation (a[]=1&a[]=2), allowing attackers to cause denial-of-service via memory exhaustion. Applications using arrayLimit\u00a0for DoS protection are vulnerable.\n\nDetailsThe arrayLimit\u00a0option only checks limits for indexed notation (a[0]=1&a[1]=2) but completely bypasses it for bracket notation (a[]=1&a[]=2).\n\nVulnerable code\u00a0(lib/parse.js:159-162):\n\nif (root === '[]' && options.parseArrays) { obj = utils.combine([], leaf); // No arrayLimit check }\n\nWorking code\u00a0(lib/parse.js:175):\n\nelse if (index <= options.arrayLimit) { // Limit checked here obj = []; obj[index] = leaf; }\n\nThe bracket notation handler at line 159 uses utils.combine([], leaf)\u00a0without validating against options.arrayLimit, while indexed notation at line 175 checks index <= options.arrayLimit\u00a0before creating arrays.\n\nPoCTest 1 - Basic bypass:\n\nnpm install qs\n\nconst qs", diff --git a/test/requirements.txt b/test/requirements.txt index 0ecb0d3df619..7444d61963db 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -13,6 +13,7 @@ pytest-timeout pytest-json-report pytest-xdist sagemaker>=2,<3 +sagemaker-experiments xmltodict retrying gitpython