Skip to content

Commit 38c537c

Browse files
deniskuzZDenys Kuzmenko
authored andcommitted
Provide docker image for HMS
1 parent 413069e commit 38c537c

File tree

11 files changed

+492
-5
lines changed

11 files changed

+492
-5
lines changed

packaging/src/docker/conf/hive-site.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
<value>/opt/hive/data/warehouse</value>
6262
</property>
6363
<property>
64-
<name>metastore.metastore.event.db.notification.api.auth</name>
64+
<name>hive.metastore.event.db.notification.api.auth</name>
6565
<value>false</value>
6666
</property>
6767
</configuration>

standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,7 @@ public enum ConfVars {
875875
EVENT_DB_LISTENER_CLEAN_STARTUP_WAIT_INTERVAL("metastore.event.db.listener.clean.startup.wait.interval",
876876
"hive.metastore.event.db.listener.clean.startup.wait.interval", 1, TimeUnit.DAYS,
877877
"Wait interval post start of metastore after which the cleaner thread starts to work"),
878-
EVENT_DB_NOTIFICATION_API_AUTH("metastore.metastore.event.db.notification.api.auth",
878+
EVENT_DB_NOTIFICATION_API_AUTH("metastore.event.db.notification.api.auth",
879879
"hive.metastore.event.db.notification.api.auth", true,
880880
"Should metastore do authorization against database notification related APIs such as get_next_notification.\n" +
881881
"If set to true, then only the superusers in proxy settings have the permission"),

standalone-metastore/metastore-server/pom.xml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,39 @@
491491
</plugins>
492492
</reporting>
493493
</profile>
494+
<profile>
495+
<id>dist</id>
496+
</profile>
497+
<profile>
498+
<id>docker</id>
499+
<build>
500+
<plugins>
501+
<plugin>
502+
<groupId>org.codehaus.mojo</groupId>
503+
<artifactId>exec-maven-plugin</artifactId>
504+
<executions>
505+
<execution>
506+
<id>build-docker-image</id>
507+
<phase>install</phase>
508+
<goals>
509+
<goal>exec</goal>
510+
</goals>
511+
<configuration>
512+
<executable>bash</executable>
513+
<environmentVariables>
514+
<SOURCE_DIR>${standalone.metastore.path.to.root}</SOURCE_DIR>
515+
</environmentVariables>
516+
<arguments>
517+
<argument>src/docker/build.sh</argument>
518+
<argument>-hadoop ${hadoop.version}</argument>
519+
</arguments>
520+
</configuration>
521+
</execution>
522+
</executions>
523+
</plugin>
524+
</plugins>
525+
</build>
526+
</profile>
494527
<!--
495528
<profile>
496529
<id>checkin</id>

standalone-metastore/metastore-server/src/assembly/bin.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
-->
1919

2020
<assembly
21-
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
21+
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/2.0.0"
2222
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
23-
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
23+
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
2424

2525
<id>bin</id>
2626

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
ARG BUILD_ENV
18+
19+
FROM ubuntu as unarchive
20+
ONBUILD COPY hadoop-*.tar.gz /opt
21+
ONBUILD COPY apache-hive-standalone-metastore-server-*-bin.tar.gz /opt
22+
23+
FROM ubuntu as archive
24+
ARG HADOOP_VERSION
25+
ARG HIVE_VERSION
26+
ONBUILD RUN apt-get update && apt-get -y install wget
27+
ONBUILD RUN wget https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz && \
28+
wget https://archive.apache.org/dist/hive/hive-standalone-metastore-$HIVE_VERSION/apache-hive-standalone-metastore-server-$HIVE_VERSION-bin.tar.gz
29+
ONBUILD RUN mv hadoop-$HADOOP_VERSION.tar.gz /opt && \
30+
mv apache-hive-standalone-metastore-server-$HIVE_VERSION-bin.tar.gz /opt
31+
32+
FROM ubuntu as buildarchive
33+
ARG HADOOP_VERSION
34+
ARG HIVE_VERSION
35+
ONBUILD RUN apt-get update && apt-get -y install wget
36+
ONBUILD RUN wget https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
37+
ONBUILD COPY ./apache-hive-standalone-metastore-server-$HIVE_VERSION-bin.tar.gz /opt
38+
ONBUILD RUN mv hadoop-$HADOOP_VERSION.tar.gz /opt
39+
40+
FROM ${BUILD_ENV} as env
41+
RUN echo ${BUILD_ENV}
42+
ARG HADOOP_VERSION
43+
ARG HIVE_VERSION
44+
45+
RUN tar -xzvf /opt/hadoop-$HADOOP_VERSION.tar.gz -C /opt/ && \
46+
rm -rf /opt/hadoop-$HADOOP_VERSION/share/doc/* && \
47+
tar -xzvf /opt/apache-hive-standalone-metastore-server-$HIVE_VERSION-bin.tar.gz -C /opt/
48+
49+
FROM eclipse-temurin:17.0.12_7-jre-ubi9-minimal AS run
50+
51+
ARG HADOOP_VERSION
52+
ARG HIVE_VERSION
53+
COPY --from=env /opt/hadoop-$HADOOP_VERSION /opt/hadoop
54+
COPY --from=env /opt/apache-hive-metastore-$HIVE_VERSION-bin /opt/hive
55+
56+
# Install dependencies
57+
RUN set -ex; \
58+
microdnf update -y; \
59+
microdnf -y install procps; \
60+
rm -rf /var/lib/apt/lists/*
61+
62+
# Set necessary environment variables.
63+
ENV HADOOP_HOME=/opt/hadoop \
64+
HIVE_HOME=/opt/hive \
65+
HIVE_VER=$HIVE_VERSION
66+
67+
ENV PATH=$HIVE_HOME/bin:$HADOOP_HOME/bin:$PATH
68+
69+
COPY entrypoint.sh /
70+
COPY conf $HIVE_HOME/conf
71+
RUN chmod +x /entrypoint.sh
72+
73+
74+
ARG UID=1000
75+
RUN useradd --no-create-home -s /sbin/nologin -c "" --uid $UID hive && \
76+
chown hive /opt/hive && \
77+
chown hive /opt/hadoop && \
78+
chown hive /opt/hive/conf && \
79+
mkdir -p /opt/hive/data/warehouse && \
80+
chown hive /opt/hive/data/warehouse
81+
82+
USER hive
83+
WORKDIR /opt/hive
84+
EXPOSE 9083
85+
ENTRYPOINT ["sh", "-c", "/entrypoint.sh"]
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
<!--
2+
{% comment %}
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to you under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
{% endcomment %}
18+
-->
19+
### Introduction
20+
21+
---
22+
Run Apache Hive Metastore inside docker container
23+
- Quick-start for Hive Metastore
24+
25+
26+
## Quickstart
27+
### STEP 1: Pull the image
28+
- Pull the image from DockerHub: https://hub.docker.com/r/apache/hive-metastore/tags.
29+
30+
Here are the latest images:
31+
- 4.0.0
32+
- 4.0.0-beta-1
33+
- 3.1.3
34+
35+
```shell
36+
docker pull apache/hive-metastore:4.0.0
37+
```
38+
### STEP 2: Export the Hive version
39+
```shell
40+
export HIVE_VERSION=4.0.0
41+
```
42+
43+
### STEP 3: Launch Standalone Metastore backed by Derby,
44+
```shell
45+
docker run -d -p 9083:9083 --name metastore-standalone apache/hive-metastore:${HIVE_VERSION}
46+
```
47+
48+
### Detailed Setup
49+
---
50+
#### Build image
51+
Apache Hive Metastore relies on Hadoop and some others to facilitate managing metadata of large datasets.
52+
The `build.sh` provides ways to build the image against specified version of the dependent, as well as build from source.
53+
54+
##### Build from source
55+
```shell
56+
mvn clean package -pl metastore-server -DskipTests -Pdocker
57+
```
58+
##### Build with specified version
59+
There are some arguments to specify the component version:
60+
```shell
61+
-hadoop <hadoop version>
62+
-hive <hive version>
63+
```
64+
If the version is not provided, it will read the version from current `pom.xml`:
65+
`project.version`, `hadoop.version` for Hive and Hadoop respectively.
66+
67+
For example, the following command uses Hive 4.0.0 and Hadoop `hadoop.version` to build the image,
68+
```shell
69+
./build.sh -hive 4.0.0
70+
```
71+
If the command does not specify the Hive version, it will use the local `apache-hive-standalone-metastore-${project.version}-bin.tar.gz`(will trigger a build if it doesn't exist),
72+
together with Hadoop 3.1.0 to build the image,
73+
```shell
74+
./build.sh -hadoop 3.1.0
75+
```
76+
After building successfully, we can get a Docker image named `apache/hive-metastore` by default, the image is tagged by the provided Hive version.
77+
78+
#### Run services
79+
80+
Before going further, we should define the environment variable `HIVE_VERSION` first.
81+
For example, if `-hive 4.0.0` is specified to build the image,
82+
```shell
83+
export HIVE_VERSION=4.0.0
84+
```
85+
or assuming that you're relying on current `project.version` from pom.xml,
86+
```shell
87+
export HIVE_VERSION=$(mvn -f pom.xml -q help:evaluate -Dexpression=project.version -DforceStdout)
88+
```
89+
- Metastore
90+
91+
For a quick start, launch the Metastore with Derby,
92+
```shell
93+
docker run -d -p 9083:9083 --name metastore-standalone apache/hive:${HIVE_VERSION}
94+
```
95+
Everything would be lost when the service is down. In order to save the Hive table's schema and data, start the container with an external Postgres and Volume to keep them,
96+
97+
```shell
98+
docker run -d -p 9083:9083 --env DB_DRIVER=postgres \
99+
--env SERVICE_OPTS="-Djavax.jdo.option.ConnectionDriverName=org.postgresql.Driver -Djavax.jdo.option.ConnectionURL=jdbc:postgresql://postgres:5432/metastore_db -Djavax.jdo.option.ConnectionUserName=hive -Djavax.jdo.option.ConnectionPassword=password" \
100+
--mount source=warehouse,target=/opt/hive/data/warehouse \
101+
--mount type=bind,source=`mvn help:evaluate -Dexpression=settings.localRepository -q -DforceStdout`/org/postgresql/postgresql/42.7.3/postgresql-42.7.3.jar,target=/opt/hive/lib/postgres.jar \
102+
--name metastore-standalone apache/hive:${HIVE_VERSION}
103+
```
104+
105+
If you want to use your own `hdfs-site.xml` or `yarn-site.xml` for the service, you can provide the environment variable `HIVE_CUSTOM_CONF_DIR` for the command. For instance, put the custom configuration file under the directory `/opt/hive/conf`, then run,
106+
107+
```shell
108+
docker run -d -p 9083:9083 --env DB_DRIVER=postgres \
109+
-v /opt/hive/conf:/hive_custom_conf --env HIVE_CUSTOM_CONF_DIR=/hive_custom_conf \
110+
--mount type=bind,source=`mvn help:evaluate -Dexpression=settings.localRepository -q -DforceStdout`/org/postgresql/postgresql/42.7.3/postgresql-42.7.3.jar,target=/opt/hive/lib/postgres.jar \
111+
--name metastore apache/hive:${HIVE_VERSION}
112+
```
113+
114+
NOTE:
115+
116+
For Hive releases before 4.0, if you want to upgrade the existing external Metastore schema to the target version,
117+
then add "--env SCHEMA_COMMAND=upgradeSchema" to the command.
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#!/bin/bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
set -eux
20+
HIVE_VERSION=
21+
HADOOP_VERSION=
22+
usage() {
23+
cat <<EOF 1>&2
24+
Usage: $0 [-h] [-hadoop <Hadoop version>] [-hive <Hive version>] [-repo <Docker repo>]
25+
Build the Hive Docker image
26+
-help Display help
27+
-hadoop Build image with the specified Hadoop version
28+
-hive Build image with the specified Hive version
29+
-repo Docker repository
30+
EOF
31+
}
32+
33+
while [ $# -gt 0 ]; do
34+
case "$1" in
35+
-h)
36+
usage
37+
exit 0
38+
;;
39+
-hadoop)
40+
shift
41+
HADOOP_VERSION=$1
42+
shift
43+
;;
44+
-hive)
45+
shift
46+
HIVE_VERSION=$1
47+
shift
48+
;;
49+
-repo)
50+
shift
51+
REPO=$1
52+
shift
53+
;;
54+
*)
55+
shift
56+
;;
57+
esac
58+
done
59+
60+
SCRIPT_DIR=$(cd $(dirname $0); pwd)
61+
SOURCE_DIR=${SOURCE_DIR:-"$SCRIPT_DIR/../../.."}
62+
repo=${REPO:-apache}
63+
WORK_DIR="$(mktemp -d)"
64+
CACHE_DIR="$SCRIPT_DIR/../../cache"
65+
mkdir -p "$CACHE_DIR"
66+
HADOOP_VERSION=${HADOOP_VERSION:-$(mvn -f "$SOURCE_DIR/pom.xml" -q help:evaluate -Dexpression=hadoop.version -DforceStdout)}
67+
68+
HADOOP_FILE_NAME="hadoop-$HADOOP_VERSION.tar.gz"
69+
HADOOP_URL=${HADOOP_URL:-"https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/$HADOOP_FILE_NAME"}
70+
if [ ! -f "$CACHE_DIR/$HADOOP_FILE_NAME" ]; then
71+
echo "Downloading Hadoop from $HADOOP_URL..."
72+
if ! curl --fail -L "$HADOOP_URL" -o "$CACHE_DIR/$HADOOP_FILE_NAME.tmp"; then
73+
echo "Fail to download Hadoop, exiting...."
74+
exit 1
75+
fi
76+
mv "$CACHE_DIR/$HADOOP_FILE_NAME.tmp" "$CACHE_DIR/$HADOOP_FILE_NAME"
77+
fi
78+
79+
if [ -n "$HIVE_VERSION" ]; then
80+
HIVE_FILE_NAME="apache-hive-standalone-metastore-server-$HIVE_VERSION-bin.tar.gz"
81+
if [ ! -f "$CACHE_DIR/$HIVE_FILE_NAME" ]; then
82+
HIVE_URL=${HIVE_URL:-"https://archive.apache.org/dist/hive/hive-standalone-metastore-server-$HIVE_VERSION/$HIVE_FILE_NAME"}
83+
echo "Downloading Hive Metastore from $HIVE_URL..."
84+
if ! curl --fail -L "$HIVE_URL" -o "$CACHE_DIR/$HIVE_FILE_NAME.tmp"; then
85+
echo "Failed to download Hive Metastore, exiting..."
86+
exit 1
87+
fi
88+
mv "$CACHE_DIR/$HIVE_FILE_NAME.tmp" "$CACHE_DIR/$HIVE_FILE_NAME"
89+
fi
90+
cp "$CACHE_DIR/$HIVE_FILE_NAME" "$WORK_DIR"
91+
else
92+
HIVE_VERSION=$(mvn -f "$SOURCE_DIR/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout)
93+
HIVE_TAR="$SOURCE_DIR/metastore-server/target/apache-hive-standalone-metastore-server-$HIVE_VERSION-bin.tar.gz"
94+
if ls "$HIVE_TAR" || mvn -f "$SOURCE_DIR/pom.xml" clean package -DskipTests; then
95+
cp "$HIVE_TAR" "$WORK_DIR/"
96+
else
97+
echo "Failed to compile Hive Metastore project, exiting..."
98+
exit 1
99+
fi
100+
fi
101+
102+
cp "$CACHE_DIR/hadoop-$HADOOP_VERSION.tar.gz" "$WORK_DIR/"
103+
cp -R "$SOURCE_DIR/metastore-server/src/docker/conf" "$WORK_DIR/"
104+
cp -R "$SOURCE_DIR/metastore-server/src/docker/entrypoint.sh" "$WORK_DIR/"
105+
cp "$SOURCE_DIR/metastore-server/src/docker/Dockerfile" "$WORK_DIR/"
106+
docker build \
107+
"$WORK_DIR" \
108+
-f "$WORK_DIR/Dockerfile" \
109+
-t "$repo/hive-metastore:$HIVE_VERSION" \
110+
--build-arg "BUILD_ENV=unarchive" \
111+
--build-arg "HIVE_VERSION=$HIVE_VERSION" \
112+
--build-arg "HADOOP_VERSION=$HADOOP_VERSION" \
113+
114+
rm -r "${WORK_DIR}"

0 commit comments

Comments
 (0)