Skip to content

Commit 8fef643

Browse files
Add AIPCC-based Containerfile for Spark operator
Multi-stage Containerfile using quay.io/aipcc/base-images/cpu as the base image with Java 21 (copied from ubi9/openjdk-21), PySpark 4.0.1 from Red Hat's internal PyPI index, tini from EPEL, and OpenShift arbitrary UID compatibility. Updated params.env to reference the new image.
1 parent 5d6b95f commit 8fef643

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed

examples/openshift/Dockerfile.odh

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# Dockerfile.odh for Spark Operator based on AIPCC base image
2+
#
3+
# Build: docker build -f examples/openshift/Dockerfile.odh -t <image> .
4+
# Override base: --build-arg BASE_IMAGE=<other-image>
5+
6+
################################################################################
7+
# Build Arguments
8+
################################################################################
9+
ARG GO_BUILDER_IMAGE=registry.access.redhat.com/ubi9/go-toolset:1.25.7
10+
ARG JDK_IMAGE=registry.redhat.io/ubi9/openjdk-21:latest
11+
ARG BASE_IMAGE=quay.io/aipcc/base-images/cpu:3.4.0-1773328752
12+
13+
################################################################################
14+
# Stage 1: Build the Go operator binary
15+
################################################################################
16+
FROM ${GO_BUILDER_IMAGE} AS builder
17+
18+
USER 0
19+
WORKDIR /workspace
20+
21+
RUN --mount=type=cache,target=/go/pkg/mod/ \
22+
--mount=type=bind,source=go.mod,target=go.mod \
23+
--mount=type=bind,source=go.sum,target=go.sum \
24+
go mod download
25+
26+
COPY . .
27+
28+
ENV GOCACHE=/root/.cache/go-build
29+
ARG TARGETARCH
30+
31+
RUN --mount=type=cache,target=/go/pkg/mod/ \
32+
--mount=type=cache,target="/root/.cache/go-build" \
33+
CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} GO111MODULE=on make build-operator
34+
35+
################################################################################
36+
# Stage 2: JDK source (provides Java 21)
37+
################################################################################
38+
FROM ${JDK_IMAGE} AS jdk
39+
40+
################################################################################
41+
# Stage 3: Runtime image
42+
################################################################################
43+
FROM ${BASE_IMAGE}
44+
45+
LABEL name="spark-operator" \
46+
summary="Spark Operator for OpenShift" \
47+
description="Kubeflow Spark Operator built on AIPCC base with Java 21 and PySpark 4.0.1" \
48+
io.k8s.display-name="Spark Operator" \
49+
io.k8s.description="Operator for managing Apache Spark applications on OpenShift"
50+
51+
ARG SPARK_UID=185
52+
ARG PYSPARK_VERSION=4.0.1
53+
ARG PYSPARK_INDEX_URL=https://packages.redhat.com/api/pypi/public-rhai/rhoai/3.4-EA2/cpu-ubi9/simple/
54+
55+
USER 0
56+
57+
# Create the spark user/group (reuse UID/GID 185).
58+
# The base image may already have a user at UID 185, so adjust accordingly.
59+
RUN if getent group 185 > /dev/null 2>&1; then \
60+
EXISTING_GROUP=$(getent group 185 | cut -d: -f1); \
61+
groupmod -n spark "$EXISTING_GROUP"; \
62+
else \
63+
groupadd -g 185 spark; \
64+
fi && \
65+
if getent passwd 185 > /dev/null 2>&1; then \
66+
EXISTING_USER=$(getent passwd 185 | cut -d: -f1); \
67+
usermod -l spark -d /home/spark "$EXISTING_USER"; \
68+
else \
69+
useradd -u 185 -g spark -d /home/spark -s /bin/bash spark; \
70+
fi && \
71+
mkdir -p /home/spark && \
72+
chown spark:spark /home/spark
73+
74+
# Copy Java 21 from the JDK stage.
75+
COPY --from=jdk /usr/lib/jvm/java-21-openjdk /usr/lib/jvm/java-21-openjdk
76+
COPY --from=jdk /usr/share/javazi-1.8 /usr/share/javazi-1.8
77+
COPY --from=jdk /etc/java /etc/java
78+
COPY --from=jdk /etc/crypto-policies /etc/crypto-policies
79+
COPY --from=jdk /etc/pki/java /etc/pki/java
80+
81+
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk
82+
ENV PATH="${JAVA_HOME}/bin:${PATH}"
83+
84+
# Install tini from EPEL
85+
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
86+
dnf install -y tini && \
87+
dnf clean all && rm -rf /var/cache/dnf/* && \
88+
java -version 2>&1 | grep "21\." && \
89+
tini --version
90+
91+
# Install PySpark from Red Hat's internal PyPI index
92+
RUN pip3 install --no-cache-dir \
93+
--index-url ${PYSPARK_INDEX_URL} \
94+
pyspark==${PYSPARK_VERSION}
95+
96+
# Set up Spark directory structure via PySpark's installed location
97+
ENV SPARK_HOME=/opt/spark
98+
RUN PYSPARK_DIR=$(python3 -c "import pyspark; import os; print(os.path.dirname(pyspark.__file__))") && \
99+
ln -s ${PYSPARK_DIR} /opt/spark && \
100+
mkdir -p /opt/spark/work-dir /opt/spark/logs && \
101+
chmod g+w /opt/spark/work-dir && \
102+
touch /opt/spark/RELEASE && \
103+
chown -R spark:spark /opt/spark/work-dir /opt/spark/RELEASE
104+
105+
ENV PATH="${PATH}:${SPARK_HOME}/bin:${SPARK_HOME}/sbin"
106+
107+
# Webhook certs directory (needed by the operator's webhook server)
108+
RUN mkdir -p /etc/k8s-webhook-server/serving-certs && \
109+
chmod -R g+rw /etc/k8s-webhook-server/serving-certs && \
110+
chown -R spark /etc/k8s-webhook-server/serving-certs /home/spark
111+
112+
# OpenShift arbitrary UID compatibility (GID 0 must have write access)
113+
RUN chgrp -R 0 /opt/spark /etc/k8s-webhook-server && \
114+
chmod -R g=u /opt/spark /etc/k8s-webhook-server && \
115+
chmod -R 775 /opt/spark/work-dir /opt/spark/logs
116+
117+
# Copy the operator binary from builder stage
118+
COPY --from=builder /workspace/bin/spark-operator /usr/bin/spark-operator
119+
120+
# Copy operator entrypoint
121+
COPY --chmod=0755 entrypoint.sh /usr/bin/
122+
123+
USER ${SPARK_UID}
124+
125+
ENTRYPOINT ["/usr/bin/entrypoint.sh"]

0 commit comments

Comments
 (0)