Update the dockerfile base image to cuda-dl-base #3775
Triggered via pull request
January 27, 2025 10:24
Status
Cancelled
Total duration
6h 30m 59s
Artifacts
36
ci.yaml
on: pull_request
metadata
0s
Matrix: amd64 / test-distribution
Matrix: arm64 / test-distribution
Matrix: amd64 / test-jax / run-unit-test
amd64
/
...
/
launch-slurm-runner
1h 45m
amd64
/
test-nsys-jax-eks
5m 10s
Matrix: amd64 / test-nsys-jax / run-unit-test
Matrix: arm64 / test-jax / run-unit-test
Waiting for pending jobs
arm64
/
test-nsys-jax-eks
0s
arm64
/
...
/
launch-slurm-runner
Matrix: arm64 / test-nsys-jax / run-unit-test
Waiting for pending jobs
Matrix: amd64 / test-te / run-unit-test
Waiting for pending jobs
Matrix: amd64 / test-upstream-pax / pax-multi-node
Waiting for pending jobs
Matrix: amd64 / test-upstream-pax / single-process-evaluation
Waiting for pending jobs
Matrix: amd64 / test-upstream-pax / single-process-multi-device
Waiting for pending jobs
Matrix: amd64 / test-te-multigpu / te-multi-gpu
Waiting for pending jobs
Matrix: amd64 / test-upstream-t5x / t5x-multi-gpu
Matrix: amd64 / test-upstream-t5x / t5x-multi-node
Matrix: amd64 / test-gemma / run-unit-test
Matrix: amd64 / test-levanter / run-unit-test
Matrix: amd64 / test-maxtext / maxtext-multinode
Matrix: amd64 / test-maxtext / single-process-multi-device
Matrix: amd64 / test-triton / run-unit-test
Matrix: amd64 / test-nsys-jax-archive
Matrix: arm64 / test-te / run-unit-test
Waiting for pending jobs
Matrix: arm64 / test-upstream-pax / pax-multi-node
Waiting for pending jobs
Matrix: arm64 / test-upstream-pax / single-process-evaluation
Waiting for pending jobs
Matrix: arm64 / test-upstream-pax / single-process-multi-device
Waiting for pending jobs
Matrix: arm64 / test-te-multigpu / te-multi-gpu
Waiting for pending jobs
Matrix: arm64 / test-upstream-t5x / t5x-multi-gpu
Waiting for pending jobs
Matrix: arm64 / test-upstream-t5x / t5x-multi-node
Waiting for pending jobs
Matrix: arm64 / test-gemma / run-unit-test
Waiting for pending jobs
Matrix: arm64 / test-levanter / run-unit-test
Waiting for pending jobs
Matrix: arm64 / test-maxtext / maxtext-multinode
Waiting for pending jobs
Matrix: arm64 / test-maxtext / single-process-multi-device
Waiting for pending jobs
Matrix: arm64 / test-triton / run-unit-test
Waiting for pending jobs
Matrix: arm64 / test-nsys-jax-archive
Matrix: amd64 / test-rosetta-pax / rosetta-pax-multi-node-te
Waiting for pending jobs
Matrix: amd64 / test-rosetta-pax / rosetta-pax-multi-node
Waiting for pending jobs
Matrix: amd64 / test-rosetta-pax / rosetta-pax-single-node-dropout-te
Waiting for pending jobs
Matrix: amd64 / test-rosetta-pax / single-process-evaluation-te
Waiting for pending jobs
Matrix: amd64 / test-rosetta-pax / single-process-multi-device-te
Waiting for pending jobs
Matrix: amd64 / test-rosetta-t5x / multi-gpu-multi-node
Matrix: amd64 / test-rosetta-t5x / single-process-multi-device
Matrix: amd64 / test-rosetta-t5x / vit-multi-gpu-multi-node
Matrix: amd64 / test-rosetta-t5x / vit-single-process-multi-device
Matrix: arm64 / test-rosetta-pax / rosetta-pax-multi-node-te
Waiting for pending jobs
Matrix: arm64 / test-rosetta-pax / rosetta-pax-multi-node
Waiting for pending jobs
Matrix: arm64 / test-rosetta-pax / rosetta-pax-single-node-dropout-te
Waiting for pending jobs
Matrix: arm64 / test-rosetta-pax / single-process-evaluation-te
Waiting for pending jobs
Matrix: arm64 / test-rosetta-pax / single-process-multi-device-te
Waiting for pending jobs
Matrix: arm64 / test-rosetta-t5x / multi-gpu-multi-node
Waiting for pending jobs
Matrix: arm64 / test-rosetta-t5x / single-process-multi-device
Waiting for pending jobs
Matrix: arm64 / test-rosetta-t5x / vit-multi-gpu-multi-node
Waiting for pending jobs
Matrix: arm64 / test-rosetta-t5x / vit-single-process-multi-device
Waiting for pending jobs
Matrix: publish-containers
Waiting for pending jobs
finalize
/
publish-badge
Annotations
12 errors
|
amd64 / build-upstream-pax / build-upstream-pax
buildx failed with: ERROR: failed to solve: process "/bin/sh -c <<\"EOF\" bash -exu -o pipefail\ngit-clone.sh ${URLREF_LINGVO} ${SRC_PATH_LINGVO}\npushd ${SRC_PATH_LINGVO}\n\nCPU_ARCH=\"$(dpkg --print-architecture)\"\nif [[ \"${CPU_ARCH}\" == \"arm64\" ]]; then\n\n# Use aarch distribution of protobufs\npatch -p1 <<\"EOFINNER\"\ndiff --git a/lingvo/repo.bzl b/lingvo/repo.bzl\nindex ce65822d2..d9c0277aa 100644\n--- a/lingvo/repo.bzl\n+++ b/lingvo/repo.bzl\n@@ -232,9 +232,9 @@ filegroup(\n )\n \"\"\",\n urls = [\n- \"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip\",\n+ \"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-aarch_64.zip\",\n ],\n- sha256 = \"3cd951aff8ce713b94cde55e12378f505f2b89d47bf080508cf77e3934f680b6\",\n+ sha256 = \"a584286dfa8ebb17032ece206ed74d5e9931e2edb9016e427be2a0dab3b21071\",\n )\n\n def icu():\nEOFINNER\n\nfi\n\npip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.18.0\nfor pattern in \\\n \"s|tensorflow=|#tensorflow=|g\" \\\n \"s|dataclasses=|#dataclasses=|g\" \\\n \"s|==.*||g\" \\\n; do\n sed -i \"${pattern}\" ${SRC_PATH_LINGVO}/docker/dev.requirements.txt\ndone\n# Lingvo support only python < 3.12, so we hack it and update dependencies\n# to be able to build for py-3.12\nfor pattern in \\\n \"s|tensorflow-text~=2.13.0|tensorflow-text~=2.18.1|g\" \\\n \"s|tensorflow~=2.13.0|tensorflow~=2.18.0|g\" \\\n \"s|python_requires='>=3.8,<3.11'|python_requires='>=3.8,<3.13'|\" \\\n; do\n sed -i \"${pattern}\" ${SRC_PATH_LINGVO}/pip_package/setup.py;\ndone\npip install -r docker/dev.requirements.txt\n\n# Some tests are flaky right now, so we skip running the tests.\nBUILD_ARCH=\"x86_64\"\nif [[ \"$CPU_ARCH\" == \"arm64\" ]]; then\n BUILD_ARCH=\"aarch64\";\nfi\nsed -i 's/manylinux2014_x86_64/manylinux_2_38_'\"${BUILD_ARCH}\"'/' pip_package/build.sh\nSKIP_TESTS=1 PYTHON_MINOR_VERSION=$(python --version | cut -d ' ' -f 2 | cut -d '.' -f 2) pip_package/build.sh\nEOF" did not complete successfully: exit code: 1
|
|
arm64 / build-upstream-pax / build-upstream-pax
buildx failed with: ERROR: failed to solve: process "/bin/sh -c <<\"EOF\" bash -exu -o pipefail\ngit-clone.sh ${URLREF_LINGVO} ${SRC_PATH_LINGVO}\npushd ${SRC_PATH_LINGVO}\n\nCPU_ARCH=\"$(dpkg --print-architecture)\"\nif [[ \"${CPU_ARCH}\" == \"arm64\" ]]; then\n\n# Use aarch distribution of protobufs\npatch -p1 <<\"EOFINNER\"\ndiff --git a/lingvo/repo.bzl b/lingvo/repo.bzl\nindex ce65822d2..d9c0277aa 100644\n--- a/lingvo/repo.bzl\n+++ b/lingvo/repo.bzl\n@@ -232,9 +232,9 @@ filegroup(\n )\n \"\"\",\n urls = [\n- \"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip\",\n+ \"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-aarch_64.zip\",\n ],\n- sha256 = \"3cd951aff8ce713b94cde55e12378f505f2b89d47bf080508cf77e3934f680b6\",\n+ sha256 = \"a584286dfa8ebb17032ece206ed74d5e9931e2edb9016e427be2a0dab3b21071\",\n )\n\n def icu():\nEOFINNER\n\nfi\n\npip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.18.0\nfor pattern in \\\n \"s|tensorflow=|#tensorflow=|g\" \\\n \"s|dataclasses=|#dataclasses=|g\" \\\n \"s|==.*||g\" \\\n; do\n sed -i \"${pattern}\" ${SRC_PATH_LINGVO}/docker/dev.requirements.txt\ndone\n# Lingvo support only python < 3.12, so we hack it and update dependencies\n# to be able to build for py-3.12\nfor pattern in \\\n \"s|tensorflow-text~=2.13.0|tensorflow-text~=2.18.1|g\" \\\n \"s|tensorflow~=2.13.0|tensorflow~=2.18.0|g\" \\\n \"s|python_requires='>=3.8,<3.11'|python_requires='>=3.8,<3.13'|\" \\\n; do\n sed -i \"${pattern}\" ${SRC_PATH_LINGVO}/pip_package/setup.py;\ndone\npip install -r docker/dev.requirements.txt\n\n# Some tests are flaky right now, so we skip running the tests.\nBUILD_ARCH=\"x86_64\"\nif [[ \"$CPU_ARCH\" == \"arm64\" ]]; then\n BUILD_ARCH=\"aarch64\";\nfi\nsed -i 's/manylinux2014_x86_64/manylinux_2_38_'\"${BUILD_ARCH}\"'/' pip_package/build.sh\nSKIP_TESTS=1 PYTHON_MINOR_VERSION=$(python --version | cut -d ' ' -f 2 | cut -d '.' -f 2) pip_package/build.sh\nEOF" did not complete successfully: exit code: 1
|
|
amd64 / test-nsys-jax / nsys-jax-A100-unit-test
Process completed with exit code 4.
|
|
amd64 / test-triton / triton-A100-unit-test
Process completed with exit code 1.
|
|
amd64 / test-rosetta-t5x / vit-multi-gpu-multi-node (1, 2)
The run was canceled by @olupton.
|
|
amd64 / test-rosetta-t5x / vit-multi-gpu-multi-node (1, 2)
The operation was canceled.
|
|
amd64 / test-maxtext / maxtext-multinode (1, 4, 2, 2)
The run was canceled by @olupton.
|
|
amd64 / test-maxtext / maxtext-multinode (1, 4, 2, 2)
The operation was canceled.
|
|
amd64 / test-upstream-t5x / t5x-multi-node (8G2N, 8, 2)
The run was canceled by @olupton.
|
|
amd64 / test-upstream-t5x / t5x-multi-node (8G2N, 8, 2)
The operation was canceled.
|
|
|
|
|
Artifacts
Produced during runtime
| Name | Size | Digest | |
|---|---|---|---|
|
artifact-base-build-amd64
Expired
|
567 Bytes |
sha256:20136dae60bf543d9e5e4a2f07ab314078cba6ea0f52cc9979f30f33caf4e13d
|
|
|
artifact-base-build-arm64
Expired
|
567 Bytes |
sha256:b19ed3b5254d2fb201e98427964c9a854bfadee5b8e40cc59c508b2294772b2f
|
|
|
artifact-equinox-build-amd64
Expired
|
569 Bytes |
sha256:100091b21871ff82f674a6c9ccd233ba107f8a25ff4f587ea8def1494cf0c56e
|
|
|
artifact-equinox-build-arm64
Expired
|
568 Bytes |
sha256:1fda8af927c8cf41c79b1468498fe7ca04fb87823bdffc477a916de52c55aa1d
|
|
|
artifact-gemma-build-amd64
Expired
|
560 Bytes |
sha256:e2826437bbd8e93290ee7127f107d40f6e60b6016c26b5d2fcd2b6c6f8d7fcbb
|
|
|
artifact-jax-build-amd64
Expired
|
555 Bytes |
sha256:b9817008e492c844fd82be004c1e419989e0ca442726a66e18174f289a60f781
|
|
|
artifact-jax-build-arm64
Expired
|
552 Bytes |
sha256:2882ad7da424c94ba71d3e827a55aa5891f680ffc8f92d8070897c8af56022cf
|
|
|
artifact-levanter-build-amd64
Expired
|
572 Bytes |
sha256:53ed6a5aeab3106e6134d6896809e34ae3346e578634da6d1aba108257d5489a
|
|
|
artifact-levanter-build-arm64
Expired
|
572 Bytes |
sha256:cc4e61707829a9a4aae85786ecd69a9459ec9de40ae826305b79408a488bb1d2
|
|
|
artifact-maxtext-build-amd64
Expired
|
569 Bytes |
sha256:d02f9ddab0989de2bc9067512db1ad4ad87af061de4a46541f765baf012fa8a5
|
|
|
artifact-maxtext-build-arm64
Expired
|
568 Bytes |
sha256:53de23836d51840149df931f5d55a87b2cd7cd92fcae625c4a7310680b968828
|
|
|
artifact-pax-build-amd64
Expired
|
472 Bytes |
sha256:2d854eae6fb22f2b7056ad6af0271660c97fa1e1a2925205d8dec9b4ba13c261
|
|
|
artifact-pax-build-arm64
Expired
|
471 Bytes |
sha256:84d7b78229312b6aba6236f3eb229d56ed96db6c2316af2c53405eec5579b871
|
|
|
artifact-rosetta-build-t5x-amd64
Expired
|
583 Bytes |
sha256:f6cc249ecb35ddeffbf7af840246c3fe030e32f1a5e55889da62427156653f99
|
|
|
artifact-rosetta-build-t5x-arm64
Expired
|
585 Bytes |
sha256:8fd68cc12e19d4f9a287964dc1369a46755eff076fc9e975c3590b85ad199cc2
|
|
|
artifact-t5x-build-amd64
Expired
|
568 Bytes |
sha256:4f529eb739939c8641b2379e6c175ca0e6470761c5165f77b0f1e5f6a0a30325
|
|
|
artifact-t5x-build-arm64
Expired
|
569 Bytes |
sha256:c77ca0e508a1e0d2591a3b976e7efb50ffd126db3d7d54d52b4de8636b4ec87f
|
|
|
artifact-triton-build-amd64
Expired
|
565 Bytes |
sha256:d5e58bfb8e4abf137e7bdc1b622240160f0155224797bf31039a5e26fe9da508
|
|
|
bumped-manifest
Expired
|
82.1 KB |
sha256:7fe222e8a86585d172e5e61040eb80205e8439a19255aa126c0b9ffabc5295fc
|
|
|
gemma-unit-test-A100
Expired
|
5.15 KB |
sha256:1e1ae6aaccf8afbcd4642a24a343f0fd1226dace705bb40fe82c86d0157f0e59
|
|
|
jax-unit-test-A100
Expired
|
20.7 KB |
sha256:e5db9cc4d60522ff1a32c5001434a0f0d68d61cebfefdf5f56ee279bd044f8a7
|
|
|
levanter-unit-test-A100
Expired
|
15 KB |
sha256:d84bffc417f9afdbda8d033e64d30b18fcfcdc823e6d2c45d807ff780a35b30a
|
|
|
nsys-jax-unit-test-A100
Expired
|
32.3 MB |
sha256:f1ca53d894b780de1ff67eca5be868386dc660db60e9dab312932a3f2605b3df
|
|
|
rosetta-t5x-12986857460-1P1G_te-0
Expired
|
5.63 MB |
sha256:9ff6476c2acb64f4b9ae5400a8133848894b3f657c0e6c7e67b4af3369d0f231
|
|
|
rosetta-t5x-12986857460-1P8G_te-1
Expired
|
5.26 MB |
sha256:b38042063d9776d1363bc21723ceaf702868fe050bcacaaff3eb2f92eab6aea9
|
|
|
rosetta-t5x-vit-12986857460-VIT1G1N
Expired
|
21.6 KB |
sha256:0556fd3c640c64ffbdfb2dcdc8c43f2bf65ab9929d8de6d7768aabf5fa4b3bfd
|
|
|
rosetta-t5x-vit-12986857460-VIT1P8G
Expired
|
21.9 KB |
sha256:5ed39d4a87ae9784381dabb85f1441cf98504dbecc65a61f64d113af9b2124ef
|
|
|
triton-unit-test-A100
Expired
|
3.12 KB |
sha256:346d7307c87dbacca2d27daad46b2409b5d5f18f3604e7b4dccda5fb5bab858a
|
|
|
upstream-maxtext-12986857460-1DP1FSDP1TP1PP
Expired
|
16.1 KB |
sha256:77f33dbdf3a9a07f270596e22492e79f429bd0f76bc5f87a18a2745005cec36b
|
|
|
upstream-maxtext-12986857460-1DP1FSDP8TP1PP
Expired
|
22.6 KB |
sha256:b0e6e6f6c79c0aec08ea765bbdab9fe8f20416eb1f0d74e3d2186ad82f7c1ca5
|
|
|
upstream-maxtext-12986857460-1DP2FSDP4TP1PP_single_process
Expired
|
16.4 KB |
sha256:e30991798993cf201555a2ad2a463400c8b456ed228b98497aa910f44ccaa5ab
|
|
|
upstream-maxtext-12986857460-1DP4FSDP2TP1PP
Expired
|
21.9 KB |
sha256:3fe788a71e11516cf5a464f4cdd4f4b83f06be3c14c18dfb02b8cdc29c81485c
|
|
|
upstream-maxtext-12986857460-1DP8FSDP1TP1PP
Expired
|
21.8 KB |
sha256:9dd8afa14980d0d24518562c9f44953106a85c4bae3d545ab9358e2a150e6a74
|
|
|
upstream-maxtext-12986857460-2DP2FSDP2TP1PP
Expired
|
22 KB |
sha256:a2ca7f9abfa77248d38a50aca820f3d7957b5b771b79e5e351ddb34e97b47398
|
|
|
upstream-t5x-12986857460-1P2G_fmha
Expired
|
6.14 MB |
sha256:bf24be6cc76f8a9b3f9ebc1657cfbeef5c40f9c30c0e5b7cbc70cc8be103bea2
|
|
|
upstream-t5x-12986857460-1P8G
Expired
|
6.4 MB |
sha256:ef1f728857ae2f417fbef172e91bc78bdb58fc9883d8c886b22618972984434d
|
|