Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions etc/compose/hive4.0-hive/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
services:
hadoop-master:
hostname: hadoop-master
image: prestodb/hive4.0-hive:latest

73 changes: 73 additions & 0 deletions prestodb/hive4.0-hive/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM prestodb/centos7-oj8:latest
LABEL maintainer="Presto community <https://prestodb.io/community.html>"

ARG HADOOP_VERSION=3.4.1
ARG HIVE_VERSION=4.0.1
ARG MYSQL_CONNECTOR_VERSION=8.0.24
ARG AWS_SDK_VERSION=1.12.782

ENV HADOOP_HOME=/opt/hadoop
ENV HIVE_HOME=/opt/hive
ENV HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/*
ENV PATH=${HIVE_HOME}/bin:${HADOOP_HOME}/bin:${PATH}

# Copy configuration files
COPY ./files /tmp/files/

RUN yum install -y \
mariadb-server \
openssh \
openssh-clients \
openssh-server \
psmisc \
which && \
# setup ssh server for sock proxy
ssh-keygen -t rsa -b 4096 -C "[email protected]" -N "" -f /root/.ssh/id_rsa && \
ssh-keygen -t rsa -b 4096 -N "" -f /etc/ssh/ssh_host_rsa_key && \
cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \
passwd --unlock root && \
# install hadoop
HADOOP_BINARY_PATH=https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz && \
curl -o /tmp/hadoop.tar.gz --url $HADOOP_BINARY_PATH && \
tar xzf /tmp/hadoop.tar.gz --directory /opt && mv /opt/hadoop-${HADOOP_VERSION} /opt/hadoop && \
# install hive
HIVE_BINARY_PATH=https://dlcdn.apache.org/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz && \
curl -o /tmp/hive.tar.gz --url $HIVE_BINARY_PATH && \
tar xzf /tmp/hive.tar.gz --directory /opt && mv /opt/apache-hive-${HIVE_VERSION}-bin /opt/hive && \
# install mysql connector and aws s3 sdk
mkdir /opt/hive/auxlib && \
curl -o /opt/hive/auxlib/mysql-connector-java-$MYSQL_CONNECTOR_VERSION.jar https://repo1.maven.org/maven2/mysql/mysql-connector-java/$MYSQL_CONNECTOR_VERSION/mysql-connector-java-$MYSQL_CONNECTOR_VERSION.jar && \
curl -o /opt/hive/auxlib/aws-java-sdk-core-$AWS_SDK_VERSION.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/$AWS_SDK_VERSION/aws-java-sdk-core-$AWS_SDK_VERSION.jar && \
curl -o /opt/hive/auxlib/aws-java-sdk-s3-$AWS_SDK_VERSION.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/$AWS_SDK_VERSION/aws-java-sdk-s3-$AWS_SDK_VERSION.jar && \
# copy configurations
cp -a /tmp/files/root/* /root && \
cp -a /tmp/files/etc/* /etc && \
cp -a /tmp/files/opt/* /opt && \
chown -R root:root /root && \
chmod 0700 /root /root/.ssh && \
# setup hadoop and hive
/root/setup.sh && \
# cleanup
yum -q clean all && rm -rf /var/cache/yum && rm -rf /tmp/* /var/tmp/*

# HDFS port
EXPOSE 9000 9870

# HIVE Metastore port
EXPOSE 9083 10000

EXPOSE 1180

CMD /root/entrypoint.sh
6 changes: 6 additions & 0 deletions prestodb/hive4.0-hive/files/etc/hadoop-init.d/init-hdfs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash -x

echo 'N' | hdfs namenode -format
sed -i -e "s|hdfs://localhost|hdfs://$(hostname)|g" /opt/hadoop/etc/hadoop/core-site.xml
hdfs namenode &
sleep 10 && hdfs dfs -mkdir -p /user/hive/warehouse && killall java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash -ex

if [[ -n "${AWS_ACCESS_KEY_ID}" ]]
then
echo "Setting AWS keys"
sed -i -e "s|\"Use AWS_ACCESS_KEY_ID .*\"|${AWS_ACCESS_KEY_ID}|g" \
-e "s|\"Use AWS_SECRET_ACCESS_KEY .*\"|${AWS_SECRET_ACCESS_KEY}|g" \
/opt/hive/conf/hive-site.xml
fi
21 changes: 21 additions & 0 deletions prestodb/hive4.0-hive/files/etc/supervisord.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[supervisord]
logfile = /var/log/supervisord.log
logfile_maxbytes = 50MB
logfile_backups=10
loglevel = info
pidfile = /var/run/supervisord.pid
nodaemon = true
directory = /tmp
strip_ansi = false

[unix_http_server]
file = /tmp/supervisor.sock

[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

[supervisorctl]
serverurl = unix:///tmp/supervisor.sock

[include]
files = /etc/supervisord.d/*.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[program:hdfs-datanode]
command=hdfs datanode
startsecs=2
stopwaitsecs=10
user=root
redirect_stderr=true
stdout_logfile=/var/log/hadoop-hdfs/hadoop-hdfs-datanode.log
autostart=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[program:hdfs-namenode]
command=hdfs namenode
startsecs=2
stopwaitsecs=10
user=root
redirect_stderr=true
stdout_logfile=/var/log/hadoop-hdfs/hadoop-hdfs-namenode.log
autostart=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[program:hive-metastore]
# Add `--debug:port=5006` for debugging
command=hive --service metastore
startsecs=2
stopwaitsecs=10
user=root
redirect_stderr=true
stdout_logfile=/var/log/hive/hive-metastore.log
autostart=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[program:hive-server2]
command=hive --service hiveserver2
startsecs=2
stopwaitsecs=10
user=root
redirect_stderr=true
stdout_logfile=/var/log/hive/hive-server2.log
autostart=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[program:mysql-metastore]
command=/usr/bin/pidproxy /var/run/mysqld/mysqld.pid /usr/bin/mysqld_safe
startsecs=2
stopwaitsecs=10
user=mysql
redirect_stderr=true
stdout_logfile=/var/log/mysql/mysql.log
autostart=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[program:socks-proxy]
command=/usr/bin/ssh -o StrictHostKeyChecking=no -v -N -D 0.0.0.0:1180 localhost
startsecs=2
stopwaitsecs=10
startretries=30
user=root
redirect_stderr=true
stdout_logfile=/var/log/socks-proxy
autostart=true
9 changes: 9 additions & 0 deletions prestodb/hive4.0-hive/files/etc/supervisord.d/sshd.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[program:sshd]
command=/usr/sbin/sshd -D
startsecs=2
stopwaitsecs=10
startretries=30
user=root
redirect_stderr=true
stdout_logfile=/var/log/sshd
autostart=true
21 changes: 21 additions & 0 deletions prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>

<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>

<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>

</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export HADOOP_HEAPSIZE=256
26 changes: 26 additions & 0 deletions prestodb/hive4.0-hive/files/opt/hadoop/etc/hadoop/hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>

</configuration>
2 changes: 2 additions & 0 deletions prestodb/hive4.0-hive/files/opt/hive/conf/hive-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export HADOOP_CLIENT_OPTS="${HADOOP_CLIENT_OPTS} -Xmx256m -Djava.io.tmpdir=/tmp"
export HADOOP_HEAPSIZE=256
86 changes: 86 additions & 0 deletions prestodb/hive4.0-hive/files/opt/hive/conf/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>

<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/metastore?useSSL=false</value>
</property>

<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root</value>
</property>

<property>
<name>hive.metastore.connect.retries</name>
<value>15</value>
</property>

<property>
<name>hive.metastore.disallow.incompatible.col.type.changes</name>
<value>false</value>
</property>

<property>
<!-- https://community.hortonworks.com/content/supportkb/247055/errorjavalangunsupportedoperationexception-storage.html -->
<name>metastore.storage.schema.reader.impl</name>
<value>org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader</value>
</property>

<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>

<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>

<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>

<property>
<name>hive.compactor.worker.threads</name>
<value>1</value>
</property>

<property>
<name>fs.s3.awsAccessKeyId</name>
<value>"Use AWS_ACCESS_KEY_ID environment variable to set this value"</value>
</property>

<property>
<name>fs.s3.awsSecretAccessKey</name>
<value>"Use AWS_SECRET_ACCESS_KEY environment variable to set this value"</value>
</property>

<property>
<name>fs.s3a.access.key</name>
<value>"Use AWS_ACCESS_KEY_ID environment variable to set this value"</value>
</property>

<property>
<name>fs.s3a.secret.key</name>
<value>"Use AWS_SECRET_ACCESS_KEY environment variable to set this value"</value>
</property>

</configuration>
7 changes: 7 additions & 0 deletions prestodb/hive4.0-hive/files/root/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash -ex

for init_script in /etc/hadoop-init.d/*; do
"${init_script}"
done

supervisord -c /etc/supervisord.conf
23 changes: 23 additions & 0 deletions prestodb/hive4.0-hive/files/root/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash -ex

ln -s /usr/bin/re solveip /usr/libexec # mariadb-server installs resolveip in /usr/bin but mysql_install_db expects it in /usr/libexec
mkdir /var/log/mysql /var/log/hive /var/log/hadoop-hdfs

mysql_install_db

chown -R mysql:mysql /var/lib/mysql

/usr/bin/mysqld_safe &
sleep 10s

echo "GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION; FLUSH PRIVILEGES;" | mysql
echo "CREATE DATABASE metastore;" | mysql
/usr/bin/mysqladmin -u root password 'root'
/opt/hive/bin/schematool -dbType mysql -initSchema

killall mysqld
sleep 10s
chown -R mysql:mysql /var/log/mysql/
rm -rf /tmp/* /var/tmp/*

adduser hive