diff --git a/crates/catalog/hms/testdata/hms_catalog/Dockerfile b/crates/catalog/hms/testdata/hms_catalog/Dockerfile index 7c1f86266..ff8c9fae6 100644 --- a/crates/catalog/hms/testdata/hms_catalog/Dockerfile +++ b/crates/catalog/hms/testdata/hms_catalog/Dockerfile @@ -1,53 +1,34 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -FROM openjdk:8u342-jre +FROM openjdk:8-jre-slim AS build -ENV HADOOP_VERSION=3.3.5 -ENV HADOOP_HOME=/opt/hadoop-${HADOOP_VERSION} -ENV PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin +RUN apt-get update -qq && apt-get -qq -y install curl -ENV HIVE_VERSION=3.1.3 -ENV HIVE_HOME=/opt/apache-hive-${HIVE_VERSION}-bin -ENV PATH=$HIVE_HOME/bin:$PATH +ENV AWSSDK_VERSION=2.20.18 +ENV HADOOP_VERSION=3.1.0 -# Set classpath for S3 Access -ENV HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-bundle-1.12.316.jar:${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-3.3.5.jar +RUN curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar -Lo /tmp/aws-java-sdk-bundle-1.11.271.jar +RUN curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /tmp/hadoop-aws-${HADOOP_VERSION}.jar -WORKDIR /opt -RUN apt-get update && apt-get install -y procps fastjar +FROM apache/hive:3.1.3 -RUN wget https://downloads.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz && \ - tar -xzf hadoop-${HADOOP_VERSION}.tar.gz && \ - rm hadoop-${HADOOP_VERSION}.tar.gz +ENV AWSSDK_VERSION=2.20.18 +ENV HADOOP_VERSION=3.1.0 -RUN wget https://downloads.apache.org/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz && \ - tar -xzf apache-hive-${HIVE_VERSION}-bin.tar.gz && \ - rm apache-hive-${HIVE_VERSION}-bin.tar.gz - -RUN cd ${HIVE_HOME}/lib && \ - wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.28/mysql-connector-java-8.0.28.jar - -COPY ./hive-site.xml ${HIVE_HOME}/conf/hive-site.xml -COPY ./entrypoint.sh /entrypoint.sh - -RUN chmod +x /entrypoint.sh - -EXPOSE 9083 - -ENTRYPOINT ["sh", "-c", "/entrypoint.sh"] \ No newline at end of file +COPY --from=build /tmp/hadoop-aws-${HADOOP_VERSION}.jar /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar +COPY --from=build /tmp/aws-java-sdk-bundle-1.11.271.jar /opt/hive/lib/aws-java-sdk-bundle-1.11.271.jar +COPY core-site.xml /opt/hadoop/etc/hadoop/core-site.xml \ No newline at end of file diff --git a/crates/catalog/hms/testdata/hms_catalog/core-site.xml b/crates/catalog/hms/testdata/hms_catalog/core-site.xml new file mode 100644 index 000000000..53789f0f0 --- /dev/null +++ b/crates/catalog/hms/testdata/hms_catalog/core-site.xml @@ -0,0 +1,53 @@ + + + + + + + fs.defaultFS + s3a://warehouse/hive + + + fs.s3a.impl + org.apache.hadoop.fs.s3a.S3AFileSystem + + + fs.s3a.fast.upload + true + + + fs.s3a.endpoint + http://minio:9000 + + + fs.s3a.access.key + admin + + + fs.s3a.secret.key + password + + + fs.s3a.connection.ssl.enabled + false + + + fs.s3a.path.style.access + true + + \ No newline at end of file diff --git a/crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml b/crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml index 85413a8ab..c9605868b 100644 --- a/crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml +++ b/crates/catalog/hms/testdata/hms_catalog/docker-compose.yaml @@ -29,20 +29,22 @@ services: - MINIO_DOMAIN=minio command: [ "server", "/data", "--console-address", ":9001" ] - hive-mysql: - image: mysql:5.7 - expose: - - 3306 + mc: + depends_on: + - minio + image: minio/mc:RELEASE.2024-03-07T00-31-49Z environment: - - MYSQL_ROOT_PASSWORD=admin - - MYSQL_DATABASE=metastore - - MYSQL_USER=hive - - MYSQL_PASSWORD=hive + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; tail -f /dev/null " hive-metastore: - image: iceberg-hms + image: iceberg-hive-metastore build: ./ - depends_on: - - hive-mysql expose: - 9083 + environment: + SERVICE_NAME: "metastore" + SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/" diff --git a/crates/catalog/hms/testdata/hms_catalog/entrypoint.sh b/crates/catalog/hms/testdata/hms_catalog/entrypoint.sh deleted file mode 100755 index f73863781..000000000 --- a/crates/catalog/hms/testdata/hms_catalog/entrypoint.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -HIVE_VERSION=3.1.3 -HIVE_HOME=/opt/apache-hive-${HIVE_VERSION}-bin - -# Check if schema exists -${HIVE_HOME}/bin/schematool -dbType mysql -info - -if [ $? -eq 1 ]; then - echo "Getting schema info failed. Probably not initialized. Initializing...in 5s" - sleep 5 - ${HIVE_HOME}/bin/schematool -initSchema -dbType mysql -fi - -${HIVE_HOME}/bin/hive --service metastore diff --git a/crates/catalog/hms/testdata/hms_catalog/hive-site.xml b/crates/catalog/hms/testdata/hms_catalog/hive-site.xml deleted file mode 100644 index c2df65cdd..000000000 --- a/crates/catalog/hms/testdata/hms_catalog/hive-site.xml +++ /dev/null @@ -1,70 +0,0 @@ - - - - - metastore.thrift.uris - thrift://localhost:9083 - Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore. - - - metastore.task.threads.always - org.apache.hadoop.hive.metastore.events.EventCleanerTask - - - metastore.expression.proxy - org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy - - - javax.jdo.option.ConnectionDriverName - com.mysql.cj.jdbc.Driver - - - javax.jdo.option.ConnectionURL - jdbc:mysql://hive-mysql:3306/metastore - - - javax.jdo.option.ConnectionUserName - hive - - - javax.jdo.option.ConnectionPassword - hive - - - fs.s3a.impl - org.apache.hadoop.fs.s3a.S3AFileSystem - - - fs.s3a.access.key - admin - - - fs.s3a.secret.key - password - - - fs.s3a.endpoint - http://minio:9000 - - - fs.s3a.path.style.access - true - - diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs index 5628c094a..bab83a955 100644 --- a/crates/catalog/hms/tests/hms_catalog_test.rs +++ b/crates/catalog/hms/tests/hms_catalog_test.rs @@ -122,10 +122,7 @@ async fn test_get_namespace() -> Result<()> { let ns = Namespace::new(NamespaceIdent::new("default".into())); let properties = HashMap::from([ - ( - "location".to_string(), - "file:/user/hive/warehouse".to_string(), - ), + ("location".to_string(), "s3a://warehouse/hive".to_string()), ( "hive.metastore.database.owner-type".to_string(), "Role".to_string(),