Skip to content

Commit

Permalink
[docker](hive) add hive3 docker compose and modify scripts (#33115)
Browse files Browse the repository at this point in the history
add hive3 docker compose from:
big-data-europe/docker-hive#56
  • Loading branch information
suxiaogang223 authored Apr 16, 2024
1 parent c7c8916 commit c3f126b
Show file tree
Hide file tree
Showing 20 changed files with 304 additions and 121 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
HIVE_SITE_CONF_hive_server2_thrift_port=10000
HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader

CORE_CONF_fs_defaultFS=hdfs://${IP_HOST}:${FS_PORT}
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,24 @@
# limitations under the License.
#

HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://externalEnvIp:5432/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://externalEnvIp:9083
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
HIVE_SITE_CONF_hive_server2_thrift_port=10000
HIVE_SITE_CONF_hive_compactor_initiator_on=true
HIVE_SITE_CONF_hive_compactor_worker_threads=2
HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader

CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_hadoop_proxyuser_hive_hosts=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
Expand Down
70 changes: 33 additions & 37 deletions docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -19,87 +19,83 @@
version: "3.8"

services:
doris--namenode:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop-hive.env
container_name: doris--namenode
expose:
- "50070"
- "8020"
- "9000"
container_name: ${CONTAINER_UID}hadoop2-namenode
ports:
- "${FS_PORT}:8020"
healthcheck:
test: [ "CMD", "curl", "http://localhost:50070/" ]
interval: 5s
timeout: 120s
retries: 120
network_mode: "host"

doris--datanode:
datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
env_file:
- ./hadoop-hive.env
environment:
SERVICE_PRECONDITION: "externalEnvIp:50070"
container_name: doris--datanode
expose:
- "50075"
SERVICE_PRECONDITION: "namenode:50070"
container_name: ${CONTAINER_UID}hadoop2-datanode
healthcheck:
test: [ "CMD", "curl", "http://localhost:50075" ]
interval: 5s
timeout: 60s
retries: 120
network_mode: "host"

doris--hive-server:
hive-server:
image: bde2020/hive:2.3.2-postgresql-metastore
env_file:
- ./hadoop-hive.env
- ./hadoop-hive-metastore.env
environment:
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://externalEnvIp:5432/metastore"
SERVICE_PRECONDITION: "externalEnvIp:9083"
container_name: doris--hive-server
expose:
- "10000"
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
SERVICE_PRECONDITION: "hive-metastore:9083"
container_name: ${CONTAINER_UID}hive2-server
ports:
- "${HS_PORT}:10000"
depends_on:
- doris--datanode
- doris--namenode
- datanode
- namenode
healthcheck:
test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;"
interval: 10s
timeout: 120s
retries: 120
network_mode: "host"


doris--hive-metastore:
hive-metastore:
image: bde2020/hive:2.3.2-postgresql-metastore
env_file:
- ./hadoop-hive.env
- ./hadoop-hive-metastore.env
command: /bin/bash /mnt/scripts/hive-metastore.sh
# command: /opt/hive/bin/hive --service metastore
environment:
SERVICE_PRECONDITION: "externalEnvIp:50070 externalEnvIp:50075 externalEnvIp:5432"
container_name: doris--hive-metastore
expose:
- "9083"
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432"
container_name: ${CONTAINER_UID}hive2-metastore
ports:
- "${HMS_PORT}:9083"
volumes:
- ./scripts:/mnt/scripts
depends_on:
- doris--hive-metastore-postgresql
network_mode: "host"
- hive-metastore-postgresql

doris--hive-metastore-postgresql:
hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:2.3.0
restart: always
container_name: doris--hive-metastore-postgresql
expose:
- "5432"
container_name: ${CONTAINER_UID}hive2-metastore-postgresql
ports:
- "${PG_PORT}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 60s
retries: 120
network_mode: "host"

# solve HiveServer2 connect error:
# java.net.URISyntaxException Illegal character in hostname :thrift://${CONTAINER_UID}hive2_default:9083
networks:
default:
name: ${CONTAINER_UID}hive2-default
29 changes: 7 additions & 22 deletions ...arties/docker-compose/hive/gen_env.sh.tpl → .../docker-compose/hive/hive-2x_settings.env
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,11 @@
# specific language governing permissions and limitations
# under the License.

####################################################################
# This script will generate hadoop-hive.env from hadoop-hive.env.tpl
####################################################################
# Change this to a specific string.
# Do not use "_" or other sepcial characters, only number and alphabeta.
# NOTICE: change this uid will modify hive-*.yaml

set -eo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
FS_PORT=8020
HMS_PORT=9083

cp "${ROOT}"/hadoop-hive.env.tpl "${ROOT}"/hadoop-hive.env
# Need to set hostname of container to same as host machine's.
# Otherwise, the doris process can not connect to namenode directly.
HOST_NAME="doris--"

{
echo "FS_PORT=${FS_PORT}"
echo "HMS_PORT=${HMS_PORT}"
echo "CORE_CONF_fs_defaultFS=hdfs://${externalEnvIp}:${FS_PORT}"
echo "HOST_NAME=${HOST_NAME}"
echo "externalEnvIp=${externalEnvIp}"

} >>"${ROOT}"/hadoop-hive.env
export FS_PORT=8220 #should be same in regression-conf.groovy
export HMS_PORT=9283 #should be same in regression-conf.groovy
export HS_PORT=12000 #should be same in regression-conf.groovy
export PG_PORT=5632 #should be same in regression-conf.groovy
102 changes: 102 additions & 0 deletions docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


version: "3.8"

services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop-hive.env
container_name: ${CONTAINER_UID}hadoop3-namenode
ports:
- "${FS_PORT}:8020"
healthcheck:
test: [ "CMD", "curl", "http://localhost:9870/" ]
interval: 5s
timeout: 120s
retries: 120

datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8
env_file:
- ./hadoop-hive.env
environment:
SERVICE_PRECONDITION: "namenode:9870"
container_name: ${CONTAINER_UID}hadoop3-datanode
healthcheck:
test: [ "CMD", "curl", "http://localhost:9864" ]
interval: 5s
timeout: 60s
retries: 120

hive-server:
image: lishizhen/hive:3.1.2-postgresql-metastore
env_file:
- ./hadoop-hive-metastore.env
environment:
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
SERVICE_PRECONDITION: "hive-metastore:9083"
container_name: ${CONTAINER_UID}hive3-server
ports:
- "${HS_PORT}:10000"
depends_on:
- datanode
- namenode
healthcheck:
test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;"
interval: 10s
timeout: 120s
retries: 120


hive-metastore:
image: lishizhen/hive:3.1.2-postgresql-metastore
env_file:
- ./hadoop-hive-metastore.env
command: /bin/bash /mnt/scripts/hive-metastore.sh
# command: /opt/hive/bin/hive --service metastore
environment:
SERVICE_PRECONDITION: "namenode:9870 datanode:9864 hive-metastore-postgresql:5432"
container_name: ${CONTAINER_UID}hive3-metastore
ports:
- "${HMS_PORT}:9083"
volumes:
- ./scripts:/mnt/scripts
depends_on:
- hive-metastore-postgresql

hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:3.1.0
container_name: ${CONTAINER_UID}hive3-metastore-postgresql
ports:
- "${PG_PORT}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 60s
retries: 120

# solve HiveServer2 connect error:
# java.net.URISyntaxException Illegal character in hostname :thrift://${CONTAINER_UID}hive3_default:9083

networks:
default:
name: ${CONTAINER_UID}hive3-default
26 changes: 26 additions & 0 deletions docker/thirdparties/docker-compose/hive/hive-3x_settings.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Change this to a specific string.
# Do not use "_" or other sepcial characters, only number and alphabeta.
# NOTICE: change this uid will modify hive-*.yaml

export FS_PORT=8020 #should be same in regression-conf.groovy
export HMS_PORT=9083 #should be same in regression-conf.groovy
export HS_PORT=10000 #should be same in regression-conf.groovy
export PG_PORT=5432 #should be same in regression-conf.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,8 @@ CREATE TABLE `unsupported_type_table`(
k6 int
);

set hive.stats.column.autogather=false;

CREATE TABLE `schema_evo_test_text`(
id int,
name string
Expand Down Expand Up @@ -628,6 +630,8 @@ insert into `schema_evo_test_orc` select 1, "kaka";
alter table `schema_evo_test_orc` ADD COLUMNS (`ts` timestamp);
insert into `schema_evo_test_orc` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss'));

set hive.stats.column.autogather=true;

-- Currently docker is hive 2.x version. Hive 2.x versioned full-acid tables need to run major compaction.
SET hive.support.concurrency=true;
SET hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
Expand Down
Loading

0 comments on commit c3f126b

Please sign in to comment.