|
| 1 | +FROM centos:7 |
| 2 | + |
| 3 | +# install common tools |
| 4 | +RUN echo "sslverify=false" >> /etc/yum.conf |
| 5 | +RUN sed -i "s/mirror.centos.org/vault.centos.org/g" /etc/yum.repos.d/*.repo |
| 6 | +RUN sed -i "s/^#.*baseurl=http/baseurl=https/g" /etc/yum.repos.d/*.repo |
| 7 | +RUN sed -i "s/^mirrorlist/#mirrorlist/g" /etc/yum.repos.d/*.repo |
| 8 | +RUN yum update -y |
| 9 | +RUN yum install -y centos-release-scl epel-release |
| 10 | +RUN sed -i "s/mirror.centos.org/vault.centos.org/g" /etc/yum.repos.d/*.repo |
| 11 | +RUN sed -i "s/^#.*baseurl=http/baseurl=https/g" /etc/yum.repos.d/*.repo |
| 12 | +RUN sed -i "s/^mirrorlist/#mirrorlist/g" /etc/yum.repos.d/*.repo |
| 13 | +RUN yum install -y libzip unzip wget cmake3 openssl-devel llvm clang-devel clang krb5-workstation clang-devel git gcc |
| 14 | + |
| 15 | +# install gcc-11 |
| 16 | +RUN yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++ |
| 17 | +RUN echo '. /opt/rh/devtoolset-11/enable' >> ~/.bashrc |
| 18 | + |
| 19 | +RUN yum install -y llvm-toolset-7 |
| 20 | +RUN echo '. /opt/rh/llvm-toolset-7/enable' >> ~/.bashrc |
| 21 | + |
| 22 | +# install rust nightly toolchain |
| 23 | +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly-2025-06-01 |
| 24 | +ENV PATH="/root/.cargo/bin:${PATH}" |
| 25 | +RUN rustc --version |
| 26 | + |
| 27 | +# install java |
| 28 | +RUN yum install -y java-1.8.0-openjdk java-1.8.0-openjdk-devel |
| 29 | +ENV JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk" |
| 30 | +RUN echo "export JAVA_HOME=${JAVA_HOME}" >> ~/.bashrc |
| 31 | + |
| 32 | +# install maven |
| 33 | +# RUN yum install -y rh-maven35 |
| 34 | +# RUN echo 'source /opt/rh/rh-maven35/enable' >> ~/.bashrc |
| 35 | + |
| 36 | +# install protoc |
| 37 | +RUN wget -O /protobuf-21.7-linux-x86_64.zip https://github.com/protocolbuffers/protobuf/releases/download/v21.7/protoc-21.7-linux-x86_64.zip |
| 38 | +RUN mkdir /protobuf-bin && (cd /protobuf-bin && unzip /protobuf-21.7-linux-x86_64.zip) |
| 39 | +ENV PATH="/protobuf-bin/bin:${PATH}" |
| 40 | +RUN echo 'export PATH="/protobuf-bin/bin:$PATH"' >> ~/.bashrc |
| 41 | + |
| 42 | +# attach libjvm.so |
| 43 | +RUN echo 'export LD_LIBRARY_PATH=${JAVA_HOME}/jre/lib/amd64/server:${LD_LIBRARY_PATH}' >> ~/.bashrc |
| 44 | + |
| 45 | +# setup hadoop env |
| 46 | +RUN curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /root |
| 47 | +ENV HADOOP_HOME=/root/hadoop-3.3.5 |
| 48 | +RUN echo "export HADOOP_HOME=${HADOOP_HOME}" >> ~/.bashrc |
| 49 | +RUN echo "export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob)" >> ~/.bashrc |
| 50 | +RUN echo "export HDRS_NAMENODE=default" >> ~/.bashrc |
| 51 | +RUN echo "export HDRS_WORKDIR=/tmp/hdrs/" >> ~/.bashrc |
| 52 | + |
| 53 | +# install python and pip |
| 54 | +RUN yum install -y python3 python3-pip |
| 55 | +RUN pip3 install --upgrade pip |
| 56 | + |
| 57 | +# install netcat for port checking |
| 58 | +RUN yum install -y nc |
| 59 | + |
| 60 | +# install spark |
| 61 | +ARG SPARK_VERSION=3.5.0 |
| 62 | +RUN cd /opt && \ |
| 63 | + wget -q https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz && \ |
| 64 | + tar -xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz && \ |
| 65 | + mv spark-${SPARK_VERSION}-bin-hadoop3 spark && \ |
| 66 | + rm spark-${SPARK_VERSION}-bin-hadoop3.tgz |
| 67 | +ENV SPARK_HOME=/opt/spark |
| 68 | +ENV PATH="${SPARK_HOME}/bin:${PATH}" |
| 69 | + |
| 70 | +# install uniffle coordinator (must be before configuring it) |
| 71 | +ARG UNIFFLE_VERSION=0.10.0 |
| 72 | +RUN cd /opt && \ |
| 73 | + wget -q https://archive.apache.org/dist/uniffle/${UNIFFLE_VERSION}/apache-uniffle-${UNIFFLE_VERSION}-bin.tar.gz && \ |
| 74 | + tar zxvf apache-uniffle-${UNIFFLE_VERSION}-bin.tar.gz && mv apache-uniffle-${UNIFFLE_VERSION}-hadoop2.8 uniffle && \ |
| 75 | + rm -f apache-uniffle-${UNIFFLE_VERSION}-bin.tar.gz |
| 76 | +ENV UNIFFLE_HOME=/opt/uniffle |
| 77 | +ENV PATH="${UNIFFLE_HOME}/bin:${PATH}" |
| 78 | + |
| 79 | +# configure uniffle coordinator (after UNIFFLE_HOME is set and uniffle is installed) |
| 80 | +RUN mkdir -p ${UNIFFLE_HOME}/conf |
| 81 | +COPY coordinator.conf ${UNIFFLE_HOME}/conf/coordinator.conf |
| 82 | + |
| 83 | +# download uniffle spark client |
| 84 | +RUN mkdir -p ${SPARK_HOME}/jars && \ |
| 85 | + wget -q https://repo1.maven.org/maven2/org/apache/uniffle/rss-client-spark3-shaded/${UNIFFLE_VERSION}/rss-client-spark3-shaded-${UNIFFLE_VERSION}.jar \ |
| 86 | + -O ${SPARK_HOME}/jars/rss-client.jar || \ |
| 87 | + echo "Failed to download Uniffle Spark Client" |
| 88 | + |
| 89 | +# setup riffle home directory |
| 90 | +ENV RIFFLE_HOME=/opt/riffle |
| 91 | +RUN mkdir -p ${RIFFLE_HOME}/conf |
| 92 | + |
| 93 | +# configure riffle servers (place configs in RIFFLE_HOME/conf) |
| 94 | +COPY riffle.conf.1 ${RIFFLE_HOME}/conf/riffle.conf.1 |
| 95 | +COPY riffle.conf.2 ${RIFFLE_HOME}/conf/riffle.conf.2 |
| 96 | + |
| 97 | +# configure spark to use riffle |
| 98 | +COPY spark-defaults.conf ${SPARK_HOME}/conf/spark-defaults.conf |
| 99 | + |
| 100 | +# copy endpoint script |
| 101 | +COPY endpoint.sh /usr/local/bin/endpoint.sh |
| 102 | +RUN chmod +x /usr/local/bin/endpoint.sh |
| 103 | + |
| 104 | +# setup environment variables |
| 105 | +RUN echo "export SPARK_HOME=${SPARK_HOME}" >> ~/.bashrc && \ |
| 106 | + echo "export UNIFFLE_HOME=${UNIFFLE_HOME}" >> ~/.bashrc && \ |
| 107 | + echo "export RIFFLE_HOME=${RIFFLE_HOME}" >> ~/.bashrc && \ |
| 108 | + echo "export PYTHONPATH=\${SPARK_HOME}/python:\${SPARK_HOME}/python/lib/py4j-*.zip:\${PYTHONPATH}" >> ~/.bashrc |
| 109 | + |
| 110 | +RUN echo "export RUST_BACKTRACE=1" >> ~/.bashrc |
| 111 | + |
| 112 | +# Set entrypoint (no WORKDIR needed as endpoint.sh handles paths) |
| 113 | +ENTRYPOINT ["/usr/local/bin/endpoint.sh"] |
| 114 | + |
| 115 | +ENV LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH |
| 116 | +RUN echo "$HADOOP_HOME/lib/native" >> /etc/ld.so.conf.d/hadoop.conf && ldconfig |
0 commit comments