|
| 1 | +# Dockerfile.odh for Spark Operator based on AIPCC base image |
| 2 | +# |
| 3 | +# Build: docker build -f examples/openshift/Dockerfile.odh -t <image> . |
| 4 | +# Override base: --build-arg BASE_IMAGE=<other-image> |
| 5 | + |
| 6 | +################################################################################ |
| 7 | +# Build Arguments |
| 8 | +################################################################################ |
| 9 | +ARG GO_BUILDER_IMAGE=registry.access.redhat.com/ubi9/go-toolset:1.25.7 |
| 10 | +ARG JDK_IMAGE=registry.redhat.io/ubi9/openjdk-21:latest |
| 11 | +ARG BASE_IMAGE=quay.io/aipcc/base-images/cpu:3.4.0-1773328752 |
| 12 | + |
| 13 | +################################################################################ |
| 14 | +# Stage 1: Build the Go operator binary |
| 15 | +################################################################################ |
| 16 | +FROM ${GO_BUILDER_IMAGE} AS builder |
| 17 | + |
| 18 | +USER 0 |
| 19 | +WORKDIR /workspace |
| 20 | + |
| 21 | +RUN --mount=type=cache,target=/go/pkg/mod/ \ |
| 22 | + --mount=type=bind,source=go.mod,target=go.mod \ |
| 23 | + --mount=type=bind,source=go.sum,target=go.sum \ |
| 24 | + go mod download |
| 25 | + |
| 26 | +COPY . . |
| 27 | + |
| 28 | +ENV GOCACHE=/root/.cache/go-build |
| 29 | +ARG TARGETARCH |
| 30 | + |
| 31 | +RUN --mount=type=cache,target=/go/pkg/mod/ \ |
| 32 | + --mount=type=cache,target="/root/.cache/go-build" \ |
| 33 | + CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} GO111MODULE=on make build-operator |
| 34 | + |
| 35 | +################################################################################ |
| 36 | +# Stage 2: JDK source (provides Java 21) |
| 37 | +################################################################################ |
| 38 | +FROM ${JDK_IMAGE} AS jdk |
| 39 | + |
| 40 | +################################################################################ |
| 41 | +# Stage 3: Runtime image |
| 42 | +################################################################################ |
| 43 | +FROM ${BASE_IMAGE} |
| 44 | + |
| 45 | +LABEL name="spark-operator" \ |
| 46 | + summary="Spark Operator for OpenShift" \ |
| 47 | + description="Kubeflow Spark Operator built on AIPCC base with Java 21 and PySpark 4.0.1" \ |
| 48 | + io.k8s.display-name="Spark Operator" \ |
| 49 | + io.k8s.description="Operator for managing Apache Spark applications on OpenShift" |
| 50 | + |
| 51 | +ARG SPARK_UID=185 |
| 52 | +ARG PYSPARK_VERSION=4.0.1 |
| 53 | +ARG PYSPARK_INDEX_URL=https://packages.redhat.com/api/pypi/public-rhai/rhoai/3.4-EA2/cpu-ubi9/simple/ |
| 54 | + |
| 55 | +USER 0 |
| 56 | + |
| 57 | +# Create the spark user/group (reuse UID/GID 185). |
| 58 | +# The base image may already have a user at UID 185, so adjust accordingly. |
| 59 | +RUN if getent group 185 > /dev/null 2>&1; then \ |
| 60 | + EXISTING_GROUP=$(getent group 185 | cut -d: -f1); \ |
| 61 | + groupmod -n spark "$EXISTING_GROUP"; \ |
| 62 | + else \ |
| 63 | + groupadd -g 185 spark; \ |
| 64 | + fi && \ |
| 65 | + if getent passwd 185 > /dev/null 2>&1; then \ |
| 66 | + EXISTING_USER=$(getent passwd 185 | cut -d: -f1); \ |
| 67 | + usermod -l spark -d /home/spark "$EXISTING_USER"; \ |
| 68 | + else \ |
| 69 | + useradd -u 185 -g spark -d /home/spark -s /bin/bash spark; \ |
| 70 | + fi && \ |
| 71 | + mkdir -p /home/spark && \ |
| 72 | + chown spark:spark /home/spark |
| 73 | + |
| 74 | +# Copy Java 21 from the JDK stage. |
| 75 | +COPY --from=jdk /usr/lib/jvm/java-21-openjdk /usr/lib/jvm/java-21-openjdk |
| 76 | +COPY --from=jdk /usr/share/javazi-1.8 /usr/share/javazi-1.8 |
| 77 | +COPY --from=jdk /etc/java /etc/java |
| 78 | +COPY --from=jdk /etc/crypto-policies /etc/crypto-policies |
| 79 | +COPY --from=jdk /etc/pki/java /etc/pki/java |
| 80 | + |
| 81 | +ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk |
| 82 | +ENV PATH="${JAVA_HOME}/bin:${PATH}" |
| 83 | + |
| 84 | +# Install tini from EPEL |
| 85 | +RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ |
| 86 | + dnf install -y tini && \ |
| 87 | + dnf clean all && rm -rf /var/cache/dnf/* && \ |
| 88 | + java -version 2>&1 | grep "21\." && \ |
| 89 | + tini --version |
| 90 | + |
| 91 | +# Install PySpark from Red Hat's internal PyPI index |
| 92 | +RUN pip3 install --no-cache-dir \ |
| 93 | + --index-url ${PYSPARK_INDEX_URL} \ |
| 94 | + pyspark==${PYSPARK_VERSION} |
| 95 | + |
| 96 | +# Set up Spark directory structure via PySpark's installed location |
| 97 | +ENV SPARK_HOME=/opt/spark |
| 98 | +RUN PYSPARK_DIR=$(python3 -c "import pyspark; import os; print(os.path.dirname(pyspark.__file__))") && \ |
| 99 | + ln -s ${PYSPARK_DIR} /opt/spark && \ |
| 100 | + mkdir -p /opt/spark/work-dir /opt/spark/logs && \ |
| 101 | + chmod g+w /opt/spark/work-dir && \ |
| 102 | + touch /opt/spark/RELEASE && \ |
| 103 | + chown -R spark:spark /opt/spark/work-dir /opt/spark/RELEASE |
| 104 | + |
| 105 | +ENV PATH="${PATH}:${SPARK_HOME}/bin:${SPARK_HOME}/sbin" |
| 106 | + |
| 107 | +# Webhook certs directory (needed by the operator's webhook server) |
| 108 | +RUN mkdir -p /etc/k8s-webhook-server/serving-certs && \ |
| 109 | + chmod -R g+rw /etc/k8s-webhook-server/serving-certs && \ |
| 110 | + chown -R spark /etc/k8s-webhook-server/serving-certs /home/spark |
| 111 | + |
| 112 | +# OpenShift arbitrary UID compatibility (GID 0 must have write access) |
| 113 | +RUN chgrp -R 0 /opt/spark /etc/k8s-webhook-server && \ |
| 114 | + chmod -R g=u /opt/spark /etc/k8s-webhook-server && \ |
| 115 | + chmod -R 775 /opt/spark/work-dir /opt/spark/logs |
| 116 | + |
| 117 | +# Copy the operator binary from builder stage |
| 118 | +COPY --from=builder /workspace/bin/spark-operator /usr/bin/spark-operator |
| 119 | + |
| 120 | +# Copy operator entrypoint |
| 121 | +COPY --chmod=0755 entrypoint.sh /usr/bin/ |
| 122 | + |
| 123 | +USER ${SPARK_UID} |
| 124 | + |
| 125 | +ENTRYPOINT ["/usr/bin/entrypoint.sh"] |
0 commit comments