Skip to content

Commit 64966c5

Browse files
EmmaQiaoChbashimao
andauthored
Pull request/1064 (#1069)
* Allow building Merlin with older versions of `docker`. * Did not copy perf_analyzer. * Comments to explain what we do there. * Properly fix the perf_analyzer issue. * `python-libnvinfer` is finally delivered properly on ARM64. * Fix HugeCTR compilation issues in TF image. --------- Co-authored-by: Matthias Langer <[email protected]>
1 parent 58ceea4 commit 64966c5

File tree

2 files changed

+16
-14
lines changed

2 files changed

+16
-14
lines changed

docker/dockerfile.merlin

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ ARG TRITON_VERSION=23.06
33
ARG DLFW_VERSION=23.06
44

55
ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3
6+
ARG SDK_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-sdk
67
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-min
78
ARG DLFW_IMAGE=nvcr.io/nvidia/tensorflow:${TRITON_VERSION}-tf2-py3
89

910
FROM ${FULL_IMAGE} as triton
11+
FROM ${SDK_IMAGE} as sdk
1012
FROM ${DLFW_IMAGE} as dlfw
1113
FROM ${BASE_IMAGE} as build
1214

@@ -118,8 +120,9 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/
118120
COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/
119121
COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/
120122
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/
121-
# NOTE 2023-07: fil-backend is not available on ARM.
122-
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/fil/
123+
# NOTE 2023-09: fil-backend is not available on ARM. Some docker versions flag an error if there is
124+
# not a single source file to copy. To avoid this, we als specify a small dummy file.
125+
COPY --chown=1000:1000 --from=triton /opt/tritonserver/LICENSE /opt/tritonserver/backends/fil/* backends/fil/
123126
COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/.
124127

125128
ENV PATH=/opt/tritonserver/bin:${PATH}:
@@ -187,11 +190,12 @@ RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "sbsa" || echo "x86_64") && \
187190
python3 \
188191
python3-pip \
189192
python3-dev \
193+
python3-libnvinfer \
190194
rapidjson-dev \
191195
tree \
192196
wget \
193197
zlib1g-dev \
194-
# Required to build RocksDB and RdKafka..
198+
# Required to build RocksDB and RdKafka.
195199
libgflags-dev \
196200
libbz2-dev \
197201
libsnappy-dev \
@@ -208,11 +212,6 @@ RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "sbsa" || echo "x86_64") && \
208212
openssh-server \
209213
# [ HugeCTR ]
210214
libaio-dev && \
211-
# NOTE: libnvinfer is installed anyway, just Python bindings are missing on ARM.
212-
if [[ "$TARGETARCH" != "arm64" ]]; then \
213-
# TensorRT dependencies
214-
apt install -y --no-install-recommends python3-libnvinfer \
215-
; fi && \
216215
apt autoremove -y && \
217216
apt clean && \
218217
rm -rf /var/lib/apt/lists/*
@@ -225,7 +224,7 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${JAVA_HOME}/lib:${JAVA_HOME}/lib/server
225224
# Binaries
226225
COPY --chown=1000:1000 --from=build /usr/local/bin/cmake /usr/local/bin/
227226
COPY --chown=1000:1000 --from=build /usr/local/bin/pytest /usr/local/bin/
228-
COPY --chown=1000:1000 --from=build /usr/local/bin/perf_* /usr/local/bin/
227+
COPY --chown=1000:1000 --from=sdk /usr/local/bin/perf_* /usr/local/bin/
229228

230229
# Triton Server
231230
WORKDIR /opt/tritonserver
@@ -237,8 +236,9 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/
237236
COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/
238237
COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/
239238
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/python/
240-
# NOTE 2023-07: fil-backend is not available on ARM.
241-
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/fil/
239+
# NOTE 2023-09: fil-backend is not available on ARM. Some docker versions flag an error if there is
240+
# not a single source file to copy. To avoid this, we als specify a small dummy file.
241+
COPY --chown=1000:1000 --from=triton /opt/tritonserver/LICENSE /opt/tritonserver/backends/fil/* backends/fil/
242242
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorrt backends/tensorrt/
243243
COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/.
244244
COPY --chown=1000:1000 --from=triton /usr/lib/*-linux-gnu/libdcgm.so.2 /tmp
@@ -362,7 +362,7 @@ ENV PATH=${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin \
362362
YARN_NODEMANAGER_USER=root \
363363
# Tackles with ThreadReaper stack overflow issues: https://bugs.openjdk.java.net/browse/JDK-8153057
364364
LIBHDFS_OPTS='-Djdk.lang.processReaperUseDefaultStackSize=true' \
365-
# Tackles with JVM setting error signals that UCX library will check (GitLab issue #425).
365+
# Tackles with JVM setting error signals that the UCX library checks (GitLab issue #425).
366366
UCX_ERROR_SIGNALS='' \
367367
CLASSPATH=${CLASSPATH}:\
368368
${HADOOP_HOME}/etc/hadoop/*:\
@@ -389,7 +389,7 @@ ENV PATH=$PATH:${HUGECTR_HOME}/bin \
389389
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HUGECTR_HOME}/lib
390390

391391
RUN if [ "${HUGECTR_DEV_MODE}" == "false" ]; then \
392-
# Install HugeCTR inference which is dependency for hps_backenc
392+
# Install HugeCTR inference which is dependency for hps_backend
393393
git clone --branch ${HUGECTR_VER} --depth 1 https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \
394394
cd /hugectr && \
395395
git submodule update --init --recursive && \

docker/dockerfile.tf

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ ARG _CI_JOB_TOKEN=""
4242
ARG HUGECTR_VER=main
4343

4444
ENV LD_LIBRARY_PATH=/usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow:$LD_LIBRARY_PATH \
45-
LIBRARY_PATH=${HUGECTR_HOME}/lib:$LIBRARY_PATH \
4645
SOK_COMPILE_UNIT_TEST=ON
4746

4847
RUN mkdir -p /usr/local/nvidia/lib64 && \
@@ -55,6 +54,9 @@ ARG INSTALL_DISTRIBUTED_EMBEDDINGS=false
5554
ARG TFDE_VER=v23.03.00
5655
5756
RUN if [ "$HUGECTR_DEV_MODE" == "false" ]; then \
57+
export HUGECTR_HOME=/usr/local/hugectr && \
58+
rm -rf ${HUGECTR_HOME}/lib/libgmock* ${HUGECTR_HOME}/lib/pkgconfig/gmock* ${HUGECTR_HOME}/include/gmock && \
59+
rm -rf ${HUGECTR_HOME}/lib/libgtest* ${HUGECTR_HOME}/lib/pkgconfig/gtest* ${HUGECTR_HOME}/include/gtest && \
5860
git clone --branch ${HUGECTR_VER} --depth 1 --recurse-submodules --shallow-submodules https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \
5961
pushd /hugectr && \
6062
rm -rf .git/modules && \

0 commit comments

Comments
 (0)