Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ lm_eval/caching/.cache
# don't track files created by wandb
wandb
examples/wandb
.hermeto/
.hermeto.env
hermeto-output/
.build-config.json
10 changes: 9 additions & 1 deletion .tekton/odh-ta-lmes-job-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ spec:
- name: path-context
value: .
- name: hermetic
value: false
value: "true"
- name: prefetch-input
value: |
[
{"type": "pip", "path": ".", "requirements_files": ["requirements/requirements-build.txt", "requirements/requirements-cuda.txt", "requirements/requirements-cpu.txt"], "binary": {"arch": ":all:"}},
{"type": "rpm", "path": "requirements"}
]
- name: build-image-index
value: true
- name: build-platforms
Expand All @@ -49,6 +55,8 @@ spec:
- linux/s390x
- name: image-expires-after
value: 5d
- name: allow-cross-platform-images
value: "true"
- name: enable-slack-failure-notification
value: "false"
taskRunSpecs:
Expand Down
234 changes: 20 additions & 214 deletions Dockerfile.konflux.lmes-job
Original file line number Diff line number Diff line change
@@ -1,236 +1,42 @@
###############################################################
# Stage 1 Base builder image with common tooling
###############################################################
FROM quay.io/aipcc/base-images/cpu:3.5.0@sha256:a398ebc954964790a18157c50bf36656f358005786e46273b87f885d78909d5d AS cpu-base
FROM quay.io/aipcc/base-images/cuda-12.9-el9.6:3.5.0@sha256:49dd116addf206d555b6c9532cd9d90f93730d8db4363ffe9b8901c34ecd51e1 AS cuda-base

FROM registry.access.redhat.com/ubi9/python-311:latest AS builder
USER root

ENV PATH="$HOME/.cargo/bin:$PATH"
ENV LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib
ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/

ARG TARGETARCH
# Install dependencies
RUN dnf install -y skopeo && dnf clean all
RUN if [ "$TARGETARCH" = "s390x" ]; then \
dnf install -y gcc-toolset-13 make wget unzip rust cargo gcc-gfortran openblas-devel pkgconfig && \
dnf clean all ; \
elif [ "$TARGETARCH" = "ppc64le" ]; then \
dnf install -y gcc-toolset-13 make wget unzip rust cargo && \
dnf clean all ; \
fi && \
pip install --upgrade pip 'cmake<4' setuptools wheel

COPY requirements.txt .

# Create a dummy file to trigger build dependency
RUN touch /tmp/control

###############################################################
# Stage 2 to build Arrow
###############################################################

FROM builder AS arrow-builder
USER root
FROM cpu-base AS base-ppc64le
FROM cpu-base AS base-s390x
FROM cuda-base AS base-amd64
FROM cuda-base AS base-arm64

ARG TARGETARCH
RUN mkdir -p /arrowwheels

RUN if [ "$TARGETARCH" = "ppc64le" ] ; then \
PYARROW_VERSION=$(sed -n 's/^pyarrow==\([^ ;]*\).*/\1/p' requirements.txt) ; \
if [ -z "$PYARROW_VERSION" ]; then \
echo "ERROR: pyarrow version not found in requirements.txt" >&2 ; \
exit 1 ; \
fi ; \
export PYARROW_VERSION ; \
git clone -b apache-arrow-${PYARROW_VERSION} https://github.com/apache/arrow.git --recursive && \
cd arrow && rm -rf .git && mkdir dist && \
pip3 install -r python/requirements-build.txt && \
export ARROW_HOME=$(pwd)/dist && \
export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH && \
export CMAKE_PREFIX_PATH=$ARROW_HOME:$CMAKE_PREFIX_PATH && \
export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && \
export ARROW_TEST_DATA="${PWD}/testing/data" && \
cmake -S cpp -B cpp/build \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_BUILD_TYPE=release \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_DATASET=ON \
-DARROW_FILESYSTEM=ON \
-DARROW_COMPUTE=ON \
-DARROW_JSON=ON \
-DARROW_CSV=ON \
-DARROW_PYTHON=ON \
-DARROW_PARQUET=ON \
-DARROW_ORC=ON \
-DARROW_BUILD_SHARED=ON \
-DARROW_BUILD_TESTS=OFF && \
cd cpp/build && \
make -j20 install && \
export PYARROW_PARALLEL=20 && \
export PYARROW_WITH_PARQUET=1 && \
export PYARROW_WITH_DATASET=1 && \
export PYARROW_BUNDLE_ARROW_CPP=1 && \
pip3 install wheel && \
cd ../../python && \
python setup.py build_ext \
--build-type=release \
--bundle-arrow-cpp \
bdist_wheel --dist-dir /arrowwheels/ \
; fi

COPY patches/s390x/parquet-support.patch /tmp/s390x-parquet.patch

RUN if [ "$TARGETARCH" = "s390x" ] ; then \
git clone https://github.com/apache/arrow.git --recursive && \
cd arrow && git checkout 7c3d4867e40dd0100542247a61cb83520369b2d4 && \
git apply /tmp/s390x-parquet.patch && \
rm -rf .git && mkdir dist && \
pip3 install -r python/requirements-build.txt && \
export ARROW_HOME=$(pwd)/dist && \
export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH && \
export CMAKE_PREFIX_PATH=$ARROW_HOME:$CMAKE_PREFIX_PATH && \
export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && \
export ARROW_TEST_DATA="${PWD}/testing/data" && \
cmake -S cpp -B cpp/build \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_BUILD_TYPE=release \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_DATASET=ON \
-DARROW_FILESYSTEM=ON \
-DARROW_COMPUTE=ON \
-DARROW_JSON=ON \
-DARROW_CSV=ON \
-DARROW_PYTHON=ON \
-DARROW_PARQUET=ON \
-DARROW_ORC=OFF \
-DARROW_BUILD_SHARED=ON \
-DARROW_BUILD_TESTS=OFF && \
cd cpp/build && \
make -j20 install && \
export PYARROW_PARALLEL=20 && \
export PYARROW_WITH_PARQUET=1 && \
export PYARROW_WITH_DATASET=1 && \
export PYARROW_BUNDLE_ARROW_CPP=1 && \
pip3 install wheel && \
cd ../../python && \
python setup.py build_ext \
--build-type=release \
--bundle-arrow-cpp \
bdist_wheel --dist-dir /arrowwheels/ \
; fi

###############################################################
# Stage 3 to build PyTorch
###############################################################

FROM builder AS torch-builder
FROM base-${TARGETARCH}
USER root

ARG MAX_JOBS
ARG _GLIBCXX_USE_CXX11_ABI=1
RUN mkdir -p /torchwheels

ARG TARGETARCH
RUN if [ "$TARGETARCH" = "s390x" ] || [ "$TARGETARCH" = "ppc64le" ]; then \
TORCH_VERSION=$(sed -n 's/^torch==\([^ ;]*\).*/\1/p' requirements.txt) ; \
if [ -z "$TORCH_VERSION" ]; then \
echo "ERROR: torch version not found in requirements.txt" >&2 ; \
exit 1 ; \
fi ; \
export TORCH_VERSION ; \
source /opt/rh/gcc-toolset-13/enable && \
git clone --recursive https://github.com/pytorch/pytorch.git -b v${TORCH_VERSION} && \
cd pytorch && pip install -r requirements.txt && \
python setup.py develop && \
rm -f dist/torch*+git*whl && \
PYTORCH_BUILD_VERSION=${TORCH_VERSION} PYTORCH_BUILD_NUMBER=1 pip wheel . --wheel-dir /torchwheels/ \
; fi

###############################################################
# Stage 4 to build OpenBLAS
###############################################################
FROM builder AS openblas-builder
ENV OPENBLAS_VERSION=0.3.30
ARG TARGETARCH

WORKDIR /root

# Creating a directory for OpenBlas
RUN mkdir /tmp/openblas

RUN if [ "$TARGETARCH" = "ppc64le" ]; then \
source /opt/rh/gcc-toolset-13/enable && \
wget https://github.com/OpenMathLib/OpenBLAS/releases/download/v${OPENBLAS_VERSION}/OpenBLAS-${OPENBLAS_VERSION}.zip && \
unzip OpenBLAS-${OPENBLAS_VERSION}.zip -d /tmp/ && mv -T /tmp/OpenBLAS-${OPENBLAS_VERSION} /tmp/openblas && \
cd /tmp/openblas && \
make -j$(nproc) TARGET=POWER9 BINARY=64 USE_OPENMP=1 USE_THREAD=1 NUM_THREADS=120 DYNAMIC_ARCH=1 INTERFACE64=0 ; \
fi

###############################################################
# Stage 5 to build Final Image
###############################################################

FROM builder AS final-build
USER root

ARG TARGETARCH
ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/
ENV PATH="$HOME/.cargo/bin:$PATH"
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib

# Dummy file to trigger build dependency
COPY --from=torch-builder /tmp/control /dev/null
COPY --from=arrow-builder /tmp/control /dev/null

# Copy built OpenBLAS sources
COPY --from=openblas-builder /tmp/openblas/ /openblas

# Install PyTorch and Arrow wheels
ARG TARGETARCH
RUN --mount=type=cache,from=torch-builder,source=/torchwheels/,target=/torchwheels/,ro \
--mount=type=cache,from=arrow-builder,source=/arrowwheels/,target=/arrowwheels/,ro \
if [ "$TARGETARCH" = "s390x" ] || [ "$TARGETARCH" = "ppc64le" ]; then \
HOME=/root pip install /arrowwheels/*.whl /torchwheels/*.whl ; \
fi

# Install OpenBLAS
RUN if [ "$TARGETARCH" = "ppc64le" ]; then \
PREFIX=/usr/local make -C /openblas install && rm -rf /openblas ; \
fi

RUN sed -i.bak 's/include-system-site-packages = false/include-system-site-packages = true/' /opt/app-root/pyvenv.cfg

# Copy license
COPY LICENSE.md /licenses/lm-evaluation-harness.md

WORKDIR /opt/app-root/src
COPY . .

# Copy the pinned requirements file
COPY requirements.txt .
RUN dnf install -y skopeo && \
dnf clean all

# Install the pinned dependencies
RUN if [ "$TARGETARCH" = "s390x" ]; then \
grep -v -E "^(pyarrow|torch)[>=<! ]" requirements.txt > /tmp/requirements_filtered.txt && \
pip install --no-cache-dir -r /tmp/requirements_filtered.txt ; \
RUN pip install --no-cache-dir --no-deps --require-hashes \
-r requirements/requirements-build.txt

RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
pip install --no-cache-dir --no-deps --require-hashes \
-r requirements/requirements-cuda.txt; \
else \
pip install --no-cache-dir -r requirements.txt ; \
pip install --no-cache-dir --no-deps --require-hashes \
-r requirements/requirements-cpu.txt; \
fi

# Install the package
RUN pip install --no-cache-dir --no-deps -e .

RUN chmod -R og+rX /opt/app-root/src/

USER default

RUN mkdir /opt/app-root/src/hf_home && chmod g+rwx /opt/app-root/src/hf_home && \
Expand All @@ -243,7 +49,7 @@ ENV PATH="/opt/app-root/bin:/opt/app-root/src/.local/bin/:/opt/app-root/src/bin:

RUN python -c 'from lm_eval.tasks.unitxt import task; import os.path; print("class: !function " + task.__file__.replace("task.py", "task.Unitxt"))' > ./my_tasks/unitxt

ENV PYTHONPATH=/opt/app-root/src/.local/lib/python3.11/site-packages:/opt/app-root/src/lm-evaluation-harness:/opt/app-root/src:/opt/app-root/src/server
ENV PYTHONPATH=/opt/app-root/src/.local/lib/python3.12/site-packages:/opt/app-root/src/lm-evaluation-harness:/opt/app-root/src:/opt/app-root/src/server
ENV HF_HOME=/opt/app-root/src/hf_home
ENV UNITXT_CATALOGS=/opt/app-root/src/my_catalogs

Expand Down
Loading
Loading