33ARG BASE_IMAGE=ghcr.io/nvidia/jax-mealkit:jax
44ARG URLREF_PAXML=https://github.com/google/paxml.git#main
55ARG URLREF_PRAXIS=https://github.com/google/praxis.git#main
6- ARG URLREF_TFTEXT=https://github.com/tensorflow/text.git#v2.13.0
6+ ARG URLREF_TFTEXT=https://github.com/tensorflow/text.git#master
77ARG URLREF_LINGVO=https://github.com/tensorflow/lingvo.git#master
88ARG SRC_PATH_PAXML=/opt/paxml
99ARG SRC_PATH_PRAXIS=/opt/praxis
@@ -21,18 +21,19 @@ FROM ${BASE_IMAGE} as wheel-builder
2121# build tensorflow-text from source
2222#------------------------------------------------------------------------------
2323
24+ # Remove TFTEXT build from source when it has py-3.12 wheels for x86/arm64
2425FROM wheel-builder as tftext-builder
2526ARG URLREF_TFTEXT
2627ARG SRC_PATH_TFTEXT
2728RUN <<"EOF" bash -exu -o pipefail
28- pip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.13 .0
29+ pip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.18 .0
2930git-clone.sh ${URLREF_TFTEXT} ${SRC_PATH_TFTEXT}
3031cd ${SRC_PATH_TFTEXT}
3132
3233# The tftext build script queries GitHub, but these requests are sometimes
3334# throttled by GH, resulting in a corrupted uri for tensorflow in WORKSPACE.
3435# A workaround (needs to be updated when the tensorflow version changes):
35- sed -i "s/# Update TF dependency to installed tensorflow/commit_sha=1cb1a030a62b169d90d34c747ab9b09f332bf905 /" oss_scripts/prepare_tf_dep.sh
36+ sed -i "s/# Update TF dependency to installed tensorflow./commit_slug=6550e4bd80223cdb8be6c3afd1f81e86a4d433c3 /" oss_scripts/prepare_tf_dep.sh
3637
3738# Newer versions of LLVM make lld's --undefined-version check of lld is strict
3839# by default (https://reviews.llvm.org/D135402), but the tftext build seems to
4647# build lingvo
4748#------------------------------------------------------------------------------
4849
50+ # Remove Lingvo build from source when it has py-3.12 wheels for x86/arm64
4951FROM wheel-builder as lingvo-builder
5052ARG URLREF_LINGVO
5153ARG SRC_PATH_TFTEXT
@@ -55,15 +57,16 @@ ARG SRC_PATH_LINGVO
5557COPY --from=tftext-builder /opt/manifest.d/git-clone.yaml /opt/manifest.d/git-clone.yaml
5658COPY --from=tftext-builder ${SRC_PATH_TFTEXT}/tensorflow_text*.whl /opt/
5759
58- RUN <<"EOF" bash -exu -o pipefail
59- git-clone.sh ${URLREF_LINGVO} ${SRC_PATH_LINGVO}
60- EOF
61-
6260ENV USE_BAZEL_VERSION=7.1.2
61+
6362# build lingvo
6463RUN <<"EOF" bash -exu -o pipefail
64+ git-clone.sh ${URLREF_LINGVO} ${SRC_PATH_LINGVO}
6565pushd ${SRC_PATH_LINGVO}
6666
67+ CPU_ARCH="$(dpkg --print-architecture)"
68+ if [[ "${CPU_ARCH}" == "arm64" ]]; then
69+
6770# Use aarch distribution of protobufs
6871patch -p1 <<"EOFINNER"
6972diff --git a/lingvo/repo.bzl b/lingvo/repo.bzl
@@ -84,13 +87,34 @@ index ce65822d2..d9c0277aa 100644
8487 def icu():
8588EOFINNER
8689
87- pip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.13.0 /opt/tensorflow_text*.whl
88- sed -i 's/tensorflow=/#tensorflow=/' docker/dev.requirements.txt
89- sed -i 's/tensorflow-text=/#tensorflow-text=/' docker/dev.requirements.txt
90- sed -i 's/dataclasses=/#dataclasses=/' docker/dev.requirements.txt
90+ fi
91+
92+ pip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.18.0 /opt/tensorflow_text*.whl
93+ for pattern in \
94+ "s|tensorflow=|#tensorflow=|g" \
95+ "s|tensorflow-text=|#tensorflow-text=|g" \
96+ "s|dataclasses=|#dataclasses=|g" \
97+ "s|==.*||g" \
98+ ; do
99+ sed -i "${pattern}" ${SRC_PATH_LINGVO}/docker/dev.requirements.txt
100+ done
101+ # Lingvo support only python < 3.12, so we hack it and update dependencies
102+ # to be able to build for py-3.12
103+ for pattern in \
104+ "s|tensorflow-text~=2.13.0|tensorflow-text~=2.18.0|g" \
105+ "s|tensorflow~=2.13.0|tensorflow~=2.18.0|g" \
106+ "s|python_requires='>=3.8,<3.11'|python_requires='>=3.8,<3.13'|" \
107+ ; do
108+ sed -i "${pattern}" ${SRC_PATH_LINGVO}/pip_package/setup.py;
109+ done
91110pip install -r docker/dev.requirements.txt
92111
93112# Some tests are flaky right now, so we skip running the tests.
113+ BUILD_ARCH="x86_64"
114+ if [[ "$CPU_ARCH" == "arm64" ]]; then
115+ BUILD_ARCH="aarch64";
116+ fi
117+ sed -i 's/manylinux2014_x86_64/manylinux_2_38_'"${BUILD_ARCH}"'/' pip_package/build.sh
94118SKIP_TESTS=1 PYTHON_MINOR_VERSION=$(python --version | cut -d ' ' -f 2 | cut -d '.' -f 2) pip_package/build.sh
95119EOF
96120
@@ -108,15 +132,14 @@ ARG SRC_PATH_TFTEXT
108132
109133# Preserve version information of tensorflow-text and lingvo
110134COPY --from=lingvo-builder /opt/manifest.d/git-clone.yaml /opt/manifest.d/git-clone.yaml
111- COPY --from=lingvo-builder /tmp/lingvo/dist/lingvo*linux_aarch64 .whl /opt/
135+ COPY --from=lingvo-builder /tmp/lingvo/dist/lingvo*-linux* .whl /opt/
112136RUN echo "lingvo @ file://$(ls /opt/lingvo*.whl)" >> /opt/pip-tools.d/requirements-paxml.in
113137
114138COPY --from=tftext-builder ${SRC_PATH_TFTEXT}/tensorflow_text*.whl /opt/
115139RUN echo "tensorflow-text @ file://$(ls /opt/tensorflow_text*.whl)" >> /opt/pip-tools.d/requirements-paxml.in
116140
117141# paxml + praxis
118142RUN <<"EOF" bash -ex
119- echo "tensorflow==2.13.0" >> /opt/pip-tools.d/requirements-paxml.in
120143echo "tensorflow_datasets==4.9.2" >> /opt/pip-tools.d/requirements-paxml.in
121144echo "auditwheel" >> /opt/pip-tools.d/requirements-paxml.in
122145
@@ -131,11 +154,14 @@ for src in ${SRC_PATH_PAXML} ${SRC_PATH_PRAXIS}; do
131154 for pattern in \
132155 "s| @ git+https://github.com/google/flax||g" \
133156 "s| @ git+https://github.com/google/jax||g" \
157+ "s| @ git+https://github.com/google/fiddle||g" \
134158 "s|^tensorflow|#tensorflow|" \
135159 "s|^lingvo|#lingvo|" \
136160 "s|^scikit-learn|#scikit-learn|" \
137161 "s|^protobuf|#protobuf|" \
138162 "s|^numpy|#numpy|" \
163+ "s|^orbax-checkpoint|#orbax-checkpoint|" \
164+ "s| @ git+https://github.com/google/CommonLoopUtils||g" \
139165 ; do
140166 sed -i "${pattern}" */pip_package/requirements.txt requirements.in
141167 done
@@ -148,6 +174,7 @@ for src in ${SRC_PATH_PAXML} ${SRC_PATH_PRAXIS}; do
148174 fi
149175 popd
150176done
177+ sed -i 's/pysimdjson==[0-9.]*/pysimdjson/' ${SRC_PATH_PAXML}/setup.py
151178EOF
152179
153180ADD test-pax.sh /usr/local/bin
@@ -159,3 +186,6 @@ ADD test-pax.sh /usr/local/bin
159186FROM mealkit as final
160187
161188RUN pip-finalize.sh
189+
190+ # When tftext and lingvo wheels are published on pypi.org, revert this
191+ # Dockerfile to 5c4b687b918e6569bca43758c346ad8e67460154
0 commit comments