Remove deprecated flag xla_gpu_enable_triton_softmax_fusion. #3712
ci.yaml
on: pull_request
metadata
0s
Matrix: amd64 / test-distribution
Matrix: arm64 / test-distribution
Matrix: amd64 / test-jax / run-unit-test
amd64
/
...
/
launch-slurm-runner
2h 25m
amd64
/
test-nsys-jax-eks
5m 15s
Matrix: amd64 / test-nsys-jax / run-unit-test
Matrix: arm64 / test-jax / run-unit-test
Waiting for pending jobs
arm64
/
test-nsys-jax-eks
0s
arm64
/
...
/
launch-slurm-runner
Matrix: arm64 / test-nsys-jax / run-unit-test
Waiting for pending jobs
Matrix: amd64 / test-te / run-unit-test
Waiting for pending jobs
Matrix: amd64 / test-upstream-pax / pax-multi-node
Waiting for pending jobs
Matrix: amd64 / test-upstream-pax / single-process-evaluation
Waiting for pending jobs
Matrix: amd64 / test-upstream-pax / single-process-multi-device
Waiting for pending jobs
Matrix: amd64 / test-te-multigpu / te-multi-gpu
Waiting for pending jobs
Matrix: amd64 / test-upstream-t5x / t5x-multi-gpu
Matrix: amd64 / test-upstream-t5x / t5x-multi-node
Matrix: amd64 / test-gemma / run-unit-test
Matrix: amd64 / test-levanter / run-unit-test
Matrix: amd64 / test-maxtext / maxtext-multinode
Matrix: amd64 / test-maxtext / single-process-multi-device
Matrix: amd64 / test-triton / run-unit-test
Matrix: amd64 / test-nsys-jax-archive
Matrix: arm64 / test-te / run-unit-test
Waiting for pending jobs
Matrix: arm64 / test-upstream-pax / pax-multi-node
Waiting for pending jobs
Matrix: arm64 / test-upstream-pax / single-process-evaluation
Waiting for pending jobs
Matrix: arm64 / test-upstream-pax / single-process-multi-device
Waiting for pending jobs
Matrix: arm64 / test-te-multigpu / te-multi-gpu
Waiting for pending jobs
Matrix: arm64 / test-upstream-t5x / t5x-multi-gpu
Waiting for pending jobs
Matrix: arm64 / test-upstream-t5x / t5x-multi-node
Waiting for pending jobs
Matrix: arm64 / test-gemma / run-unit-test
Waiting for pending jobs
Matrix: arm64 / test-levanter / run-unit-test
Waiting for pending jobs
Matrix: arm64 / test-maxtext / maxtext-multinode
Waiting for pending jobs
Matrix: arm64 / test-maxtext / single-process-multi-device
Waiting for pending jobs
Matrix: arm64 / test-triton / run-unit-test
Waiting for pending jobs
Matrix: arm64 / test-nsys-jax-archive
Matrix: amd64 / test-rosetta-pax / rosetta-pax-multi-node-te
Waiting for pending jobs
Matrix: amd64 / test-rosetta-pax / rosetta-pax-multi-node
Waiting for pending jobs
Matrix: amd64 / test-rosetta-pax / rosetta-pax-single-node-dropout-te
Waiting for pending jobs
Matrix: amd64 / test-rosetta-pax / single-process-evaluation-te
Waiting for pending jobs
Matrix: amd64 / test-rosetta-pax / single-process-multi-device-te
Waiting for pending jobs
Matrix: amd64 / test-rosetta-t5x / multi-gpu-multi-node
Matrix: amd64 / test-rosetta-t5x / single-process-multi-device
Matrix: amd64 / test-rosetta-t5x / vit-multi-gpu-multi-node
Matrix: amd64 / test-rosetta-t5x / vit-single-process-multi-device
Matrix: arm64 / test-rosetta-pax / rosetta-pax-multi-node-te
Waiting for pending jobs
Matrix: arm64 / test-rosetta-pax / rosetta-pax-multi-node
Waiting for pending jobs
Matrix: arm64 / test-rosetta-pax / rosetta-pax-single-node-dropout-te
Waiting for pending jobs
Matrix: arm64 / test-rosetta-pax / single-process-evaluation-te
Waiting for pending jobs
Matrix: arm64 / test-rosetta-pax / single-process-multi-device-te
Waiting for pending jobs
Matrix: arm64 / test-rosetta-t5x / multi-gpu-multi-node
Waiting for pending jobs
Matrix: arm64 / test-rosetta-t5x / single-process-multi-device
Waiting for pending jobs
Matrix: arm64 / test-rosetta-t5x / vit-multi-gpu-multi-node
Waiting for pending jobs
Matrix: arm64 / test-rosetta-t5x / vit-single-process-multi-device
Waiting for pending jobs
Matrix: publish-containers
finalize
/
publish-badge
3s
Annotations
22 errors
|
amd64 / test-triton / triton-V100-unit-test
Process completed with exit code 1.
|
|
amd64 / test-triton / triton-A100-unit-test
Process completed with exit code 1.
|
|
arm64 / build-upstream-pax / build-upstream-pax
buildx failed with: ERROR: failed to solve: process "/bin/sh -c <<\"EOF\" bash -exu -o pipefail\ngit-clone.sh ${URLREF_LINGVO} ${SRC_PATH_LINGVO}\npushd ${SRC_PATH_LINGVO}\n\nCPU_ARCH=\"$(dpkg --print-architecture)\"\nif [[ \"${CPU_ARCH}\" == \"arm64\" ]]; then\n\n# Use aarch distribution of protobufs\npatch -p1 <<\"EOFINNER\"\ndiff --git a/lingvo/repo.bzl b/lingvo/repo.bzl\nindex ce65822d2..d9c0277aa 100644\n--- a/lingvo/repo.bzl\n+++ b/lingvo/repo.bzl\n@@ -232,9 +232,9 @@ filegroup(\n )\n \"\"\",\n urls = [\n- \"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip\",\n+ \"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-aarch_64.zip\",\n ],\n- sha256 = \"3cd951aff8ce713b94cde55e12378f505f2b89d47bf080508cf77e3934f680b6\",\n+ sha256 = \"a584286dfa8ebb17032ece206ed74d5e9931e2edb9016e427be2a0dab3b21071\",\n )\n\n def icu():\nEOFINNER\n\nfi\n\npip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.18.0 /opt/tensorflow_text*.whl\nfor pattern in \\\n \"s|tensorflow=|#tensorflow=|g\" \\\n \"s|tensorflow-text=|#tensorflow-text=|g\" \\\n \"s|dataclasses=|#dataclasses=|g\" \\\n \"s|==.*||g\" \\\n; do\n sed -i \"${pattern}\" ${SRC_PATH_LINGVO}/docker/dev.requirements.txt\ndone\n# Lingvo support only python < 3.12, so we hack it and update dependencies\n# to be able to build for py-3.12\nfor pattern in \\\n \"s|tensorflow-text~=2.13.0|tensorflow-text~=2.18.0|g\" \\\n \"s|tensorflow~=2.13.0|tensorflow~=2.18.0|g\" \\\n \"s|python_requires='>=3.8,<3.11'|python_requires='>=3.8,<3.13'|\" \\\n; do\n sed -i \"${pattern}\" ${SRC_PATH_LINGVO}/pip_package/setup.py;\ndone\npip install -r docker/dev.requirements.txt\n\n# Some tests are flaky right now, so we skip running the tests.\nBUILD_ARCH=\"x86_64\"\nif [[ \"$CPU_ARCH\" == \"arm64\" ]]; then\n BUILD_ARCH=\"aarch64\";\nfi\nsed -i 's/manylinux2014_x86_64/manylinux_2_38_'\"${BUILD_ARCH}\"'/' pip_package/build.sh\nSKIP_TESTS=1 PYTHON_MINOR_VERSION=$(python --version | cut -d ' ' -f 2 | cut -d '.' -f 2) pip_package/build.sh\nEOF" did not complete successfully: exit code: 1
|
|
amd64 / build-upstream-pax / build-upstream-pax
buildx failed with: ERROR: failed to solve: process "/bin/sh -c <<\"EOF\" bash -exu -o pipefail\ngit-clone.sh ${URLREF_LINGVO} ${SRC_PATH_LINGVO}\npushd ${SRC_PATH_LINGVO}\n\nCPU_ARCH=\"$(dpkg --print-architecture)\"\nif [[ \"${CPU_ARCH}\" == \"arm64\" ]]; then\n\n# Use aarch distribution of protobufs\npatch -p1 <<\"EOFINNER\"\ndiff --git a/lingvo/repo.bzl b/lingvo/repo.bzl\nindex ce65822d2..d9c0277aa 100644\n--- a/lingvo/repo.bzl\n+++ b/lingvo/repo.bzl\n@@ -232,9 +232,9 @@ filegroup(\n )\n \"\"\",\n urls = [\n- \"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip\",\n+ \"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-aarch_64.zip\",\n ],\n- sha256 = \"3cd951aff8ce713b94cde55e12378f505f2b89d47bf080508cf77e3934f680b6\",\n+ sha256 = \"a584286dfa8ebb17032ece206ed74d5e9931e2edb9016e427be2a0dab3b21071\",\n )\n\n def icu():\nEOFINNER\n\nfi\n\npip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.18.0 /opt/tensorflow_text*.whl\nfor pattern in \\\n \"s|tensorflow=|#tensorflow=|g\" \\\n \"s|tensorflow-text=|#tensorflow-text=|g\" \\\n \"s|dataclasses=|#dataclasses=|g\" \\\n \"s|==.*||g\" \\\n; do\n sed -i \"${pattern}\" ${SRC_PATH_LINGVO}/docker/dev.requirements.txt\ndone\n# Lingvo support only python < 3.12, so we hack it and update dependencies\n# to be able to build for py-3.12\nfor pattern in \\\n \"s|tensorflow-text~=2.13.0|tensorflow-text~=2.18.0|g\" \\\n \"s|tensorflow~=2.13.0|tensorflow~=2.18.0|g\" \\\n \"s|python_requires='>=3.8,<3.11'|python_requires='>=3.8,<3.13'|\" \\\n; do\n sed -i \"${pattern}\" ${SRC_PATH_LINGVO}/pip_package/setup.py;\ndone\npip install -r docker/dev.requirements.txt\n\n# Some tests are flaky right now, so we skip running the tests.\nBUILD_ARCH=\"x86_64\"\nif [[ \"$CPU_ARCH\" == \"arm64\" ]]; then\n BUILD_ARCH=\"aarch64\";\nfi\nsed -i 's/manylinux2014_x86_64/manylinux_2_38_'\"${BUILD_ARCH}\"'/' pip_package/build.sh\nSKIP_TESTS=1 PYTHON_MINOR_VERSION=$(python --version | cut -d ' ' -f 2 | cut -d '.' -f 2) pip_package/build.sh\nEOF" did not complete successfully: exit code: 1
|
|
amd64 / test-upstream-t5x / t5x-multi-node (8G2N, 8, 2)
The job running on runner jumpbox-vc69x-br4m5 has exceeded the maximum execution time of 360 minutes.
|
|
amd64 / test-upstream-t5x / t5x-multi-node (8G2N, 8, 2)
The operation was canceled.
|
|
amd64 / test-rosetta-t5x / multi-gpu-multi-node (2N8G-te-1, 8, 2, --gin.train/utils.DatasetConfig.pack=False --gin.train_eva...
The job running on runner jumpbox-vc69x-bmlml has exceeded the maximum execution time of 360 minutes.
|
|
|
|
amd64 / test-maxtext / maxtext-multinode (1, 4, 2, 2)
The job running on runner jumpbox-vc69x-q6fjn has exceeded the maximum execution time of 360 minutes.
|
|
amd64 / test-maxtext / maxtext-multinode (1, 4, 2, 2)
The operation was canceled.
|
|
amd64 / test-maxtext / test-maxtext-outcome
Process completed with exit code 1.
|
|
amd64 / test-rosetta-t5x / vit-multi-gpu-multi-node (1, 2)
The job running on runner jumpbox-vc69x-fv9t7 has exceeded the maximum execution time of 360 minutes.
|
|
amd64 / test-rosetta-t5x / vit-multi-gpu-multi-node (1, 2)
The operation was canceled.
|
|
amd64 / test-upstream-t5x / t5x-multi-node (8G2N_fmha, 8, 2, --enable-fmha 1)
The job running on runner jumpbox-vc69x-9hvh7 has exceeded the maximum execution time of 360 minutes.
|
|
amd64 / test-upstream-t5x / t5x-multi-node (8G2N_fmha, 8, 2, --enable-fmha 1)
The operation was canceled.
|
|
amd64 / test-rosetta-t5x / multi-gpu-multi-node (2N2G_te-0, 2, 2, --enable-te 0)
The job running on runner jumpbox-vc69x-7vh7h has exceeded the maximum execution time of 360 minutes.
|
|
amd64 / test-rosetta-t5x / multi-gpu-multi-node (2N2G_te-0, 2, 2, --enable-te 0)
The operation was canceled.
|
|
amd64 / test-upstream-t5x / test-upstream-t5x-outcome
Process completed with exit code 1.
|
|
amd64 / test-rosetta-t5x / vit-multi-gpu-multi-node (8, 2)
The job running on runner jumpbox-vc69x-n2rhn has exceeded the maximum execution time of 360 minutes.
|
|
amd64 / test-rosetta-t5x / vit-multi-gpu-multi-node (8, 2)
The operation was canceled.
|
|
amd64 / test-rosetta-t5x / test-t5x-rosetta-outcome
Process completed with exit code 1.
|
|
finalize / report
Process completed with exit code 5.
|
Artifacts
Produced during runtime
| Name | Size | Digest | |
|---|---|---|---|
|
artifact-maxtext-test
Expired
|
654 Bytes |
sha256:9c95b5426419fdc419610e855c18b9497808ca5e9450b6fc66d03b1b181ac8c5
|
|
|
artifact-pax-build-amd64
Expired
|
472 Bytes |
sha256:b7a307064ad8fa22cf0b465a33a1168441ddab79765a68f5d08f0c3879bc16fe
|
|
|
artifact-pax-build-arm64
Expired
|
471 Bytes |
sha256:14e8d3fe27d353a851699aeb98abbc8bff33fc58de472f130696ccffb5504eac
|
|
|
artifact-rosetta-t5x-mgmn-test
Expired
|
658 Bytes |
sha256:b3b8eb1d1466e484c2ed3aa36949107dad659a493a595767d439dda1daecc6d8
|
|
|
artifact-upstream-t5x-mgmn-test
Expired
|
634 Bytes |
sha256:bc7f4d0a832fee0a20c84ac6aba18ea113d131cf2c3608f93d812e82151539c6
|
|
|
artifact-workflow-metadata
Expired
|
277 Bytes |
sha256:e86cba5bb54cb8abef9222aeb658757284dec5e9ee98a3e2922ffb7addcba112
|
|
|
final-base
Expired
|
193 Bytes |
sha256:1550ed72d2ab3fe214f849086834d1beca10ff50a97f62f215847f82646b35c0
|
|
|
final-equinox
Expired
|
202 Bytes |
sha256:c996d180c7621f9c368de14eda3ea2f9ab44fa4518271b7cd2d84bb135e512b7
|
|
|
final-gemma
Expired
|
196 Bytes |
sha256:8b5e56c84765b47dcc3ba96e2a4a0c9424b3c6a9a7f30468d10d9b8d3ae4cdf4
|
|
|
final-jax
Expired
|
190 Bytes |
sha256:b0e4fb16c1d80cbd262908c265323291b8625260c4c31ee6505cde2060f115ec
|
|
|
final-levanter
Expired
|
205 Bytes |
sha256:fa3f3266a49ddfa44179b613581ebdfeb9ab235107e765ce22ef924c7c328041
|
|
|
final-maxtext
Expired
|
202 Bytes |
sha256:9a26fceeb371ae69be332cbce96e13e55e2697363275d391bb596b3ce669c748
|
|
|
final-t5x
Expired
|
190 Bytes |
sha256:798e51b93b9cf82425e9e493133332ff79b2dc5bce8e12eddc1c6c496835aeab
|
|
|
final-triton
Expired
|
199 Bytes |
sha256:f655c5ac58fc65230f9a8293c5f541c591f3409997baec782310eecdea6428b5
|
|
|
final-upstream-t5x
Expired
|
217 Bytes |
sha256:c22472f4c8e8d4166c85863fa525dd8691abef3bad4f4b13dcb2b207eeccedae
|
|
|
mealkit-equinox
Expired
|
214 Bytes |
sha256:3a8f9f6b28722b0f5f547e8506510ae8289759c379bdcfd0eba26fd67ebe6d9e
|
|
|
mealkit-gemma
Expired
|
208 Bytes |
sha256:c18fef78b9694d1212dd9ab8b54e5404c94ba8bc08cc9a6f9d26fc156a97a7eb
|
|
|
mealkit-jax
Expired
|
199 Bytes |
sha256:30450abbdfab6bfc2306ced65bd036d8034f2b5ad5465cc556dd594d8686726d
|
|
|
mealkit-levanter
Expired
|
217 Bytes |
sha256:4521634eed3122175cb0c9aa01f4273508b469bef7da7da635204773109861ab
|
|
|
mealkit-maxtext
Expired
|
214 Bytes |
sha256:0996731b98501c355c23125bde97a7551b8fbe56a94d7b35a7032b5495ca37cd
|
|
|
mealkit-t5x
Expired
|
202 Bytes |
sha256:2c01aad3b1cce4a0cba8a8cc4fcb2858c7a0d2aee4cbc8d270490784f5012a6f
|
|
|
mealkit-triton
Expired
|
211 Bytes |
sha256:452de74ac30d776f7ddbd814e2d4488f7ec63a399dd27e1c75c091cc5332ab28
|
|
|
mealkit-upstream-t5x
Expired
|
229 Bytes |
sha256:e96e4202590aee840427c1861b154ad9f33532b0c007debdcb7dd970f985cd74
|
|