From 5b73abd0b74014d26b8f6bd1ac11fe6c262a82fd Mon Sep 17 00:00:00 2001 From: Qi Yu Date: Mon, 23 Sep 2024 10:07:56 +0800 Subject: [PATCH] [#4979] improvement(docker-hive): Add S3 related configuration to support Hive S3 schema/table. (#4980) ### What changes were proposed in this pull request? Add S3-related configuration in the `hive-site.xml` ### Why are the changes needed? To support create table with S3 location. Fix: #4979 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? Test locally. --- dev/docker/hive/hive-site.xml | 21 +++++++++++++++++++++ dev/docker/hive/start.sh | 6 ++++++ docs/docker-image-details.md | 6 ++++++ 3 files changed, 33 insertions(+) diff --git a/dev/docker/hive/hive-site.xml b/dev/docker/hive/hive-site.xml index 3346d6be61f..477187153cb 100644 --- a/dev/docker/hive/hive-site.xml +++ b/dev/docker/hive/hive-site.xml @@ -42,4 +42,25 @@ hdfs://__REPLACE__HOST_NAME:9000/user/hive/warehouse location of default database for the warehouse + + + fs.s3a.access.key + S3_ACCESS_KEY_ID + + + + fs.s3a.secret.key + S3_SECRET_KEY_ID + + + + fs.s3a.endpoint + S3_ENDPOINT_ID + + + + fs.s3a.aws.credentials.provider + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,com.amazonaws.auth.EnvironmentVariableCredentialsProvider,org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider + + diff --git a/dev/docker/hive/start.sh b/dev/docker/hive/start.sh index 8bf1f12b97f..b9c545a0a7f 100644 --- a/dev/docker/hive/start.sh +++ b/dev/docker/hive/start.sh @@ -27,6 +27,8 @@ else ln -s ${HADOOP2_HOME} ${HADOOP_HOME} fi + cp ${HADOOP_HOME}/share/hadoop/tools/lib/*aws* ${HIVE_HOME}/lib + # Copy Hadoop and Hive configuration file and update hostname cp -f ${HADOOP_TMP_CONF_DIR}/* ${HADOOP_CONF_DIR} cp -f ${HIVE_TMP_CONF_DIR}/* ${HIVE_CONF_DIR} @@ -34,6 +36,10 @@ sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/core-site.xml sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HADOOP_CONF_DIR}/hdfs-site.xml sed -i "s/__REPLACE__HOST_NAME/$(hostname)/g" ${HIVE_CONF_DIR}/hive-site.xml +sed -i "s|S3_ACCESS_KEY_ID|${S3_ACCESS_KEY}|g" ${HIVE_CONF_DIR}/hive-site.xml +sed -i "s|S3_SECRET_KEY_ID|${S3_SECRET_KEY}|g" ${HIVE_CONF_DIR}/hive-site.xml +sed -i "s|S3_ENDPOINT_ID|${S3_ENDPOINT}|g" ${HIVE_CONF_DIR}/hive-site.xml + # Link mysql-connector-java after deciding where HIVE_HOME symbolic link points to. ln -s /opt/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.jar ${HIVE_HOME}/lib diff --git a/docs/docker-image-details.md b/docs/docker-image-details.md index 629541ea026..ba061ccd988 100644 --- a/docs/docker-image-details.md +++ b/docs/docker-image-details.md @@ -147,6 +147,12 @@ You can use this kind of image to test the catalog of Apache Hive. Changelog +- apache/gravitino-ci:hive-0.1.14 + - Add amazon S3 related configurations in the `hive-site.xml` file. + - `fs.s3a.access.key` The access key for the S3 bucket. + - `fs.s3a.secret.key` The secret key for the S3 bucket. + - `fs.s3a.endpoint` The endpoint for the S3 bucket. + - apache/gravitino-ci:hive-0.1.13 (Switch to Apache official DockerHub repository) - Use Gravitino release 0.6.0 Dockerfile to build the image.