diff --git a/etc/compose/hive4.0-hive/docker-compose.yml b/etc/compose/hive4.0-hive/docker-compose.yml new file mode 100644 index 0000000..37d2947 --- /dev/null +++ b/etc/compose/hive4.0-hive/docker-compose.yml @@ -0,0 +1,5 @@ +services: + hadoop-master: + hostname: hadoop-master + image: prestodb/hive4.0-hive:latest + diff --git a/prestodb/hive4.0-hive/Dockerfile b/prestodb/hive4.0-hive/Dockerfile new file mode 100644 index 0000000..0914c55 --- /dev/null +++ b/prestodb/hive4.0-hive/Dockerfile @@ -0,0 +1,73 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM prestodb/centos7-oj8:latest +LABEL maintainer="Presto community " + +ARG HADOOP_VERSION=3.4.1 +ARG HIVE_VERSION=4.0.1 +ARG MYSQL_CONNECTOR_VERSION=8.0.24 +ARG AWS_SDK_VERSION=1.12.782 + +ENV HADOOP_HOME=/opt/hadoop +ENV HIVE_HOME=/opt/hive +ENV HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/* +ENV PATH=${HIVE_HOME}/bin:${HADOOP_HOME}/bin:${PATH} + +# Copy configuration files +COPY ./files /tmp/files/ + +RUN yum install -y \ + mariadb-server \ + openssh \ + openssh-clients \ + openssh-server \ + psmisc \ + which && \ + # setup ssh server for sock proxy + ssh-keygen -t rsa -b 4096 -C "automation@prestodb.io" -N "" -f /root/.ssh/id_rsa && \ + ssh-keygen -t rsa -b 4096 -N "" -f /etc/ssh/ssh_host_rsa_key && \ + cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \ + passwd --unlock root && \ + # install hadoop + HADOOP_BINARY_PATH=https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz && \ + curl -o /tmp/hadoop.tar.gz --url $HADOOP_BINARY_PATH && \ + tar xzf /tmp/hadoop.tar.gz --directory /opt && mv /opt/hadoop-${HADOOP_VERSION} /opt/hadoop && \ + # install hive + HIVE_BINARY_PATH=https://dlcdn.apache.org/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz && \ + curl -o /tmp/hive.tar.gz --url $HIVE_BINARY_PATH && \ + tar xzf /tmp/hive.tar.gz --directory /opt && mv /opt/apache-hive-${HIVE_VERSION}-bin /opt/hive && \ + # install mysql connector and aws s3 sdk + mkdir /opt/hive/auxlib && \ + curl -o /opt/hive/auxlib/mysql-connector-java-$MYSQL_CONNECTOR_VERSION.jar https://repo1.maven.org/maven2/mysql/mysql-connector-java/$MYSQL_CONNECTOR_VERSION/mysql-connector-java-$MYSQL_CONNECTOR_VERSION.jar && \ + curl -o /opt/hive/auxlib/aws-java-sdk-core-$AWS_SDK_VERSION.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/$AWS_SDK_VERSION/aws-java-sdk-core-$AWS_SDK_VERSION.jar && \ + curl -o /opt/hive/auxlib/aws-java-sdk-s3-$AWS_SDK_VERSION.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/$AWS_SDK_VERSION/aws-java-sdk-s3-$AWS_SDK_VERSION.jar && \ + # copy configurations + cp -a /tmp/files/root/* /root && \ + cp -a /tmp/files/etc/* /etc && \ + cp -a /tmp/files/opt/* /opt && \ + chown -R root:root /root && \ + chmod 0700 /root /root/.ssh && \ + # setup hadoop and hive + /root/setup.sh && \ + # cleanup + yum -q clean all && rm -rf /var/cache/yum && rm -rf /tmp/* /var/tmp/* + +# HDFS port +EXPOSE 9000 9870 + +# HIVE Metastore port +EXPOSE 9083 10000 + +EXPOSE 1180 + +CMD /root/entrypoint.sh diff --git a/prestodb/hive4.0-hive/files/etc/hadoop-init.d/init-hdfs.sh b/prestodb/hive4.0-hive/files/etc/hadoop-init.d/init-hdfs.sh new file mode 100755 index 0000000..8279fa9 --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/hadoop-init.d/init-hdfs.sh @@ -0,0 +1,6 @@ +#!/bin/bash -x + +echo 'N' | hdfs namenode -format +sed -i -e "s|hdfs://localhost|hdfs://$(hostname)|g" /opt/hadoop/etc/hadoop/core-site.xml +hdfs namenode & +sleep 10 && hdfs dfs -mkdir -p /user/hive/warehouse && killall java diff --git a/prestodb/hive4.0-hive/files/etc/hadoop-init.d/set-aws-creds.sh b/prestodb/hive4.0-hive/files/etc/hadoop-init.d/set-aws-creds.sh new file mode 100755 index 0000000..d420834 --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/hadoop-init.d/set-aws-creds.sh @@ -0,0 +1,9 @@ +#!/bin/bash -ex + +if [[ -n "${AWS_ACCESS_KEY_ID}" ]] +then + echo "Setting AWS keys" + sed -i -e "s|\"Use AWS_ACCESS_KEY_ID .*\"|${AWS_ACCESS_KEY_ID}|g" \ + -e "s|\"Use AWS_SECRET_ACCESS_KEY .*\"|${AWS_SECRET_ACCESS_KEY}|g" \ + /opt/hive/conf/hive-site.xml +fi diff --git a/prestodb/hive4.0-hive/files/etc/supervisord.conf b/prestodb/hive4.0-hive/files/etc/supervisord.conf new file mode 100644 index 0000000..2ac8dba --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/supervisord.conf @@ -0,0 +1,21 @@ +[supervisord] +logfile = /var/log/supervisord.log +logfile_maxbytes = 50MB +logfile_backups=10 +loglevel = info +pidfile = /var/run/supervisord.pid +nodaemon = true +directory = /tmp +strip_ansi = false + +[unix_http_server] +file = /tmp/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl = unix:///tmp/supervisor.sock + +[include] +files = /etc/supervisord.d/*.conf diff --git a/prestodb/hive4.0-hive/files/etc/supervisord.d/hdfs-datanode.conf b/prestodb/hive4.0-hive/files/etc/supervisord.d/hdfs-datanode.conf new file mode 100644 index 0000000..78a80dc --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/supervisord.d/hdfs-datanode.conf @@ -0,0 +1,8 @@ +[program:hdfs-datanode] +command=hdfs datanode +startsecs=2 +stopwaitsecs=10 +user=root +redirect_stderr=true +stdout_logfile=/var/log/hadoop-hdfs/hadoop-hdfs-datanode.log +autostart=true diff --git a/prestodb/hive4.0-hive/files/etc/supervisord.d/hdfs-namenode.conf b/prestodb/hive4.0-hive/files/etc/supervisord.d/hdfs-namenode.conf new file mode 100644 index 0000000..f450cdd --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/supervisord.d/hdfs-namenode.conf @@ -0,0 +1,8 @@ +[program:hdfs-namenode] +command=hdfs namenode +startsecs=2 +stopwaitsecs=10 +user=root +redirect_stderr=true +stdout_logfile=/var/log/hadoop-hdfs/hadoop-hdfs-namenode.log +autostart=true diff --git a/prestodb/hive4.0-hive/files/etc/supervisord.d/hive-metastore.conf b/prestodb/hive4.0-hive/files/etc/supervisord.d/hive-metastore.conf new file mode 100644 index 0000000..ea98b3c --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/supervisord.d/hive-metastore.conf @@ -0,0 +1,9 @@ +[program:hive-metastore] +# Add `--debug:port=5006` for debugging +command=hive --service metastore +startsecs=2 +stopwaitsecs=10 +user=root +redirect_stderr=true +stdout_logfile=/var/log/hive/hive-metastore.log +autostart=true diff --git a/prestodb/hive4.0-hive/files/etc/supervisord.d/hive-server2.conf b/prestodb/hive4.0-hive/files/etc/supervisord.d/hive-server2.conf new file mode 100644 index 0000000..578507e --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/supervisord.d/hive-server2.conf @@ -0,0 +1,8 @@ +[program:hive-server2] +command=hive --service hiveserver2 +startsecs=2 +stopwaitsecs=10 +user=root +redirect_stderr=true +stdout_logfile=/var/log/hive/hive-server2.log +autostart=true diff --git a/prestodb/hive4.0-hive/files/etc/supervisord.d/mysql-metastore.conf b/prestodb/hive4.0-hive/files/etc/supervisord.d/mysql-metastore.conf new file mode 100644 index 0000000..e95544e --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/supervisord.d/mysql-metastore.conf @@ -0,0 +1,8 @@ +[program:mysql-metastore] +command=/usr/bin/pidproxy /var/run/mysqld/mysqld.pid /usr/bin/mysqld_safe +startsecs=2 +stopwaitsecs=10 +user=mysql +redirect_stderr=true +stdout_logfile=/var/log/mysql/mysql.log +autostart=true diff --git a/prestodb/hive4.0-hive/files/etc/supervisord.d/socks-proxy.conf b/prestodb/hive4.0-hive/files/etc/supervisord.d/socks-proxy.conf new file mode 100644 index 0000000..43602d2 --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/supervisord.d/socks-proxy.conf @@ -0,0 +1,9 @@ +[program:socks-proxy] +command=/usr/bin/ssh -o StrictHostKeyChecking=no -v -N -D 0.0.0.0:1180 localhost +startsecs=2 +stopwaitsecs=10 +startretries=30 +user=root +redirect_stderr=true +stdout_logfile=/var/log/socks-proxy +autostart=true diff --git a/prestodb/hive4.0-hive/files/etc/supervisord.d/sshd.conf b/prestodb/hive4.0-hive/files/etc/supervisord.d/sshd.conf new file mode 100644 index 0000000..d0fe128 --- /dev/null +++ b/prestodb/hive4.0-hive/files/etc/supervisord.d/sshd.conf @@ -0,0 +1,9 @@ +[program:sshd] +command=/usr/sbin/sshd -D +startsecs=2 +stopwaitsecs=10 +startretries=30 +user=root +redirect_stderr=true +stdout_logfile=/var/log/sshd +autostart=true diff --git a/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/core-site.xml b/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/core-site.xml new file mode 100644 index 0000000..96cca1d --- /dev/null +++ b/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/core-site.xml @@ -0,0 +1,21 @@ + + + + + + + fs.defaultFS + hdfs://localhost:9000 + + + + hadoop.proxyuser.root.hosts + * + + + + hadoop.proxyuser.root.groups + * + + + diff --git a/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/hadoop-env.sh b/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/hadoop-env.sh new file mode 100644 index 0000000..37be409 --- /dev/null +++ b/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/hadoop-env.sh @@ -0,0 +1 @@ +export HADOOP_HEAPSIZE=256 diff --git a/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/hdfs-site.xml b/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/hdfs-site.xml new file mode 100644 index 0000000..18526b2 --- /dev/null +++ b/prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/hdfs-site.xml @@ -0,0 +1,26 @@ + + + + + + + + + + dfs.permissions.enabled + false + + + diff --git a/prestodb/hive4.0-hive/files/opt/hive/conf/hive-env.sh b/prestodb/hive4.0-hive/files/opt/hive/conf/hive-env.sh new file mode 100644 index 0000000..37a8a3b --- /dev/null +++ b/prestodb/hive4.0-hive/files/opt/hive/conf/hive-env.sh @@ -0,0 +1,2 @@ +export HADOOP_CLIENT_OPTS="${HADOOP_CLIENT_OPTS} -Xmx256m -Djava.io.tmpdir=/tmp" +export HADOOP_HEAPSIZE=256 diff --git a/prestodb/hive4.0-hive/files/opt/hive/conf/hive-site.xml b/prestodb/hive4.0-hive/files/opt/hive/conf/hive-site.xml new file mode 100644 index 0000000..a23a838 --- /dev/null +++ b/prestodb/hive4.0-hive/files/opt/hive/conf/hive-site.xml @@ -0,0 +1,86 @@ + + + + + + hive.metastore.uris + thrift://localhost:9083 + + + + javax.jdo.option.ConnectionURL + jdbc:mysql://localhost:3306/metastore?useSSL=false + + + + javax.jdo.option.ConnectionDriverName + com.mysql.cj.jdbc.Driver + + + + javax.jdo.option.ConnectionUserName + root + + + + javax.jdo.option.ConnectionPassword + root + + + + hive.metastore.connect.retries + 15 + + + + hive.metastore.disallow.incompatible.col.type.changes + false + + + + + metastore.storage.schema.reader.impl + org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader + + + + hive.support.concurrency + true + + + + hive.txn.manager + org.apache.hadoop.hive.ql.lockmgr.DbTxnManager + + + + hive.compactor.initiator.on + true + + + + hive.compactor.worker.threads + 1 + + + + fs.s3.awsAccessKeyId + "Use AWS_ACCESS_KEY_ID environment variable to set this value" + + + + fs.s3.awsSecretAccessKey + "Use AWS_SECRET_ACCESS_KEY environment variable to set this value" + + + + fs.s3a.access.key + "Use AWS_ACCESS_KEY_ID environment variable to set this value" + + + + fs.s3a.secret.key + "Use AWS_SECRET_ACCESS_KEY environment variable to set this value" + + + diff --git a/prestodb/hive4.0-hive/files/root/entrypoint.sh b/prestodb/hive4.0-hive/files/root/entrypoint.sh new file mode 100755 index 0000000..aa6f587 --- /dev/null +++ b/prestodb/hive4.0-hive/files/root/entrypoint.sh @@ -0,0 +1,7 @@ +#!/bin/bash -ex + +for init_script in /etc/hadoop-init.d/*; do + "${init_script}" +done + +supervisord -c /etc/supervisord.conf diff --git a/prestodb/hive4.0-hive/files/root/setup.sh b/prestodb/hive4.0-hive/files/root/setup.sh new file mode 100755 index 0000000..58a8928 --- /dev/null +++ b/prestodb/hive4.0-hive/files/root/setup.sh @@ -0,0 +1,23 @@ +#!/bin/bash -ex + +ln -s /usr/bin/re solveip /usr/libexec # mariadb-server installs resolveip in /usr/bin but mysql_install_db expects it in /usr/libexec +mkdir /var/log/mysql /var/log/hive /var/log/hadoop-hdfs + +mysql_install_db + +chown -R mysql:mysql /var/lib/mysql + +/usr/bin/mysqld_safe & +sleep 10s + +echo "GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION; FLUSH PRIVILEGES;" | mysql +echo "CREATE DATABASE metastore;" | mysql +/usr/bin/mysqladmin -u root password 'root' +/opt/hive/bin/schematool -dbType mysql -initSchema + +killall mysqld +sleep 10s +chown -R mysql:mysql /var/log/mysql/ +rm -rf /tmp/* /var/tmp/* + +adduser hive