Skip to content

Commit

Permalink
[#4528] improvement(hive-catalog): reduce hive catalog libs size from…
Browse files Browse the repository at this point in the history
… 146MB to 43MB (#4531)

### What changes were proposed in this pull request?

remove some unnecessary dependencies

### Why are the changes needed?

Fix: #4528 

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

CI passed
  • Loading branch information
mchades authored Aug 30, 2024
1 parent f4143ab commit c01edab
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 27 deletions.
3 changes: 3 additions & 0 deletions LICENSE.bin
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,8 @@
WildFly
Confluent Kafka Streams Examples
Apache Arrow
Rome
Jettison

This product bundles various third-party components also under the
Apache Software Foundation License 1.1
Expand Down Expand Up @@ -404,6 +406,7 @@
Common Development and Distribution License 1.0

Javax Activation
Javax Mail
Stax API
Java Servlet API
JSR311 API
Expand Down
16 changes: 11 additions & 5 deletions authorizations/authorization-ranger/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,17 @@ dependencies {
implementation(project(":core")) {
exclude(group = "*")
}

implementation(libs.bundles.log4j)
implementation(libs.commons.lang3)
implementation(libs.guava)

implementation(libs.javax.jaxb.api) {
exclude("*")
}
implementation(libs.javax.ws.rs.api)
implementation(libs.jettison)
compileOnly(libs.lombok)
implementation(libs.mail)
implementation(libs.ranger.intg) {
exclude("org.apache.hadoop", "hadoop-common")
exclude("org.apache.hive", "hive-storage-api")
Expand All @@ -50,11 +56,9 @@ dependencies {
exclude("org.apache.ranger", "ranger-plugin-classloader")
exclude("net.java.dev.jna")
exclude("javax.ws.rs")
exclude("org.eclipse.jetty")
}
implementation(libs.javax.ws.rs.api)
implementation(libs.javax.jaxb.api) {
exclude("*")
}
implementation(libs.rome)

testImplementation(project(":common"))
testImplementation(project(":clients:client-java"))
Expand All @@ -70,6 +74,7 @@ dependencies {
exclude("org.apache.lucene")
exclude("org.apache.solr")
exclude("org.apache.kafka")
exclude("org.eclipse.jetty")
exclude("org.elasticsearch")
exclude("org.elasticsearch.client")
exclude("org.elasticsearch.plugin")
Expand All @@ -78,6 +83,7 @@ dependencies {
}
testImplementation(libs.hive2.jdbc) {
exclude("org.slf4j")
exclude("org.eclipse.jetty.aggregate")
}
testImplementation(libs.mysql.driver)
}
Expand Down
36 changes: 32 additions & 4 deletions catalogs/catalog-hive/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,53 @@ val icebergVersion: String = libs.versions.iceberg.get()
val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get()

dependencies {
implementation(project(":api"))
implementation(project(":catalogs:catalog-common"))
implementation(project(":core"))
implementation(project(":api")) {
exclude("*")
}
implementation(project(":catalogs:catalog-common")) {
exclude("*")
}
implementation(project(":core")) {
exclude("*")
}

implementation(libs.caffeine)
implementation(libs.commons.collections3)
implementation(libs.commons.configuration1)
implementation(libs.htrace.core4)
implementation(libs.guava)
implementation(libs.hadoop2.auth) {
exclude("*")
}
implementation(libs.hive2.exec) {
artifact {
classifier = "core"
}
exclude("com.google.code.findbugs", "jsr305")
exclude("com.google.protobuf")
exclude("org.apache.avro")
exclude("org.apache.ant")
exclude("org.apache.calcite")
exclude("org.apache.calcite.avatica")
exclude("org.apache.curator")
exclude("org.apache.derby")
exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager")
exclude("org.apache.hive", "hive-llap-tez")
exclude("org.apache.hive", "hive-vector-code-gen")
exclude("org.apache.ivy")
exclude("org.apache.logging.log4j")
exclude("org.apache.zookeeper")
exclude("org.codehaus.groovy", "groovy-all")
exclude("org.datanucleus", "datanucleus-core")
exclude("org.eclipse.jetty.aggregate", "jetty-all")
exclude("org.eclipse.jetty.orbit", "javax.servlet")
exclude("org.openjdk.jol")
exclude("org.pentaho")
exclude("org.slf4j")
}
implementation(libs.woodstox.core)
implementation(libs.hive2.metastore) {
exclude("ant")
exclude("co.cask.tephra")
exclude("com.github.joshelser")
exclude("com.google.code.findbugs", "jsr305")
Expand All @@ -64,13 +85,16 @@ dependencies {
exclude("com.zaxxer", "HikariCP")
exclude("io.dropwizard.metricss")
exclude("javax.transaction", "transaction-api")
exclude("org.apache.ant")
exclude("org.apache.avro")
exclude("org.apache.curator")
exclude("org.apache.derby")
exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager")
exclude("org.apache.hbase")
exclude("org.apache.logging.log4j")
exclude("org.apache.parquet", "parquet-hadoop-bundle")
exclude("org.apache.zookeeper")
exclude("org.datanucleus")
exclude("org.eclipse.jetty.aggregate", "jetty-all")
exclude("org.eclipse.jetty.orbit", "javax.servlet")
exclude("org.openjdk.jol")
Expand Down Expand Up @@ -135,7 +159,11 @@ tasks {

val copyCatalogLibs by registering(Copy::class) {
dependsOn("jar", "runtimeJars")
from("build/libs")
from("build/libs") {
exclude("guava-*.jar")
exclude("log4j-*.jar")
exclude("slf4j-*.jar")
}
into("$rootDir/distribution/package/catalogs/hive/libs")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.ToString;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.gravitino.catalog.hive.converter.HiveDataTypeConverter;
import org.apache.gravitino.connector.BaseTable;
Expand Down Expand Up @@ -87,7 +86,7 @@ public static HiveTable.Builder fromHiveTable(Table table) {

StorageDescriptor sd = table.getSd();
Distribution distribution = Distributions.NONE;
if (CollectionUtils.isNotEmpty(sd.getBucketCols())) {
if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
// Hive table use hash strategy as bucketing strategy
distribution =
Distributions.hash(
Expand All @@ -96,7 +95,7 @@ public static HiveTable.Builder fromHiveTable(Table table) {
}

SortOrder[] sortOrders = new SortOrder[0];
if (CollectionUtils.isNotEmpty(sd.getSortCols())) {
if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
sortOrders =
sd.getSortCols().stream()
.map(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.UnknownTableException;
import org.apache.parquet.Strings;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -163,19 +162,19 @@ public Partition addPartition(Partition partition) throws PartitionAlreadyExists
Preconditions.checkArgument(
transformFields.size() == identityPartition.fieldNames().length,
"Hive partition field names must be the same as table partitioning field names: %s, but got %s",
Strings.join(transformFields, ","),
Strings.join(
String.join(",", transformFields),
String.join(
",",
Arrays.stream(identityPartition.fieldNames())
.map(f -> Strings.join(f, "."))
.collect(Collectors.toList()),
","));
.map(f -> String.join(".", f))
.collect(Collectors.toList())));
Arrays.stream(identityPartition.fieldNames())
.forEach(
f ->
Preconditions.checkArgument(
transformFields.contains(f[0]),
"Hive partition field name must be in table partitioning field names: %s, but got %s",
Strings.join(transformFields, ","),
String.join(",", transformFields),
f[0]));

try {
Expand Down
23 changes: 15 additions & 8 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,19 @@ jetty = "9.4.51.v20230217"
jersey = "2.41"
mockito = "4.11.0"
airlift-json = "237"
airlift-log = "231"
airlift-resolver = "1.6"
airlift-units = "1.8"
hive2 = "2.3.9"
hadoop2 = "2.10.2"
hadoop3 = "3.1.0"
hadoop-minikdc = "3.3.6"
htrace-core4 = "4.1.0-incubating"
httpclient5 = "5.2.1"
mockserver = "5.15.0"
commons-lang3 = "3.14.0"
commons-io = "2.15.0"
commons-collections4 = "4.4"
commons-collections3 = "3.2.2"
commons-configuration1 = "1.6"
commons-dbcp2 = "2.11.0"
caffeine = "2.9.3"
rocksdbjni = "7.10.2"
Expand All @@ -62,7 +63,6 @@ jline = "3.21.0"
okhttp3 = "4.11.0"
metrics = "4.2.25"
prometheus = "0.16.0"
jsqlparser = "4.2"
mysql = "8.0.23"
postgresql = "42.6.0"
immutables-value = "2.10.0"
Expand Down Expand Up @@ -91,6 +91,10 @@ node-plugin = "7.0.1"
commons-cli = "1.2"
sun-activation-version = "1.2.0"
error-prone = "3.1.0"
woodstox-core = "5.3.0"
mail = "1.4.1"
rome = "1.0"
jettison = "1.1"

[libraries]
protobuf-java = { group = "com.google.protobuf", name = "protobuf-java", version.ref = "protoc" }
Expand Down Expand Up @@ -130,18 +134,17 @@ hive2-metastore = { group = "org.apache.hive", name = "hive-metastore", version.
hive2-exec = { group = "org.apache.hive", name = "hive-exec", version.ref = "hive2"}
hive2-common = { group = "org.apache.hive", name = "hive-common", version.ref = "hive2"}
hive2-jdbc = { group = "org.apache.hive", name = "hive-jdbc", version.ref = "hive2"}
hadoop2-auth = { group = "org.apache.hadoop", name = "hadoop-auth", version.ref = "hadoop2" }
hadoop2-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop2" }
hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop2"}
hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-mapreduce-client-core", version.ref = "hadoop2"}
hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop3" }
hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop3"}
hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", version.ref = "hadoop3"}
hadoop3-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-mapreduce-client-core", version.ref = "hadoop3"}
hadoop3-minicluster = { group = "org.apache.hadoop", name = "hadoop-minicluster", version.ref = "hadoop-minikdc"}
htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", version.ref = "htrace-core4" }
airlift-json = { group = "io.airlift", name = "json", version.ref = "airlift-json"}
airlift-resolver = { group = "io.airlift.resolver", name = "resolver", version.ref = "airlift-resolver"}
airlift-units = { group = "io.airlift", name = "units", version.ref = "airlift-units"}
airlift-log = { group = "io.airlift", name = "log", version.ref = "airlift-log"}
httpclient5 = { group = "org.apache.httpcomponents.client5", name = "httpclient5", version.ref = "httpclient5" }
mockserver-netty = { group = "org.mock-server", name = "mockserver-netty", version.ref = "mockserver" }
mockserver-client-java = { group = "org.mock-server", name = "mockserver-client-java", version.ref = "mockserver" }
Expand All @@ -150,6 +153,8 @@ commons-io = { group = "commons-io", name = "commons-io", version.ref = "commons
caffeine = { group = "com.github.ben-manes.caffeine", name = "caffeine", version.ref = "caffeine" }
rocksdbjni = { group = "org.rocksdb", name = "rocksdbjni", version.ref = "rocksdbjni" }
commons-collections4 = { group = "org.apache.commons", name = "commons-collections4", version.ref = "commons-collections4" }
commons-collections3 = { group = "commons-collections", name = "commons-collections", version.ref = "commons-collections3" }
commons-configuration1 = { group = "commons-configuration", name = "commons-configuration", version.ref = "commons-configuration1" }
iceberg-aws = { group = "org.apache.iceberg", name = "iceberg-aws", version.ref = "iceberg" }
iceberg-core = { group = "org.apache.iceberg", name = "iceberg-core", version.ref = "iceberg" }
iceberg-api = { group = "org.apache.iceberg", name = "iceberg-api", version.ref = "iceberg" }
Expand All @@ -158,7 +163,6 @@ paimon-core = { group = "org.apache.paimon", name = "paimon-core", version.ref =
paimon-format = { group = "org.apache.paimon", name = "paimon-format", version.ref = "paimon" }
paimon-hive-catalog = { group = "org.apache.paimon", name = "paimon-hive-catalog", version.ref = "paimon" }
trino-spi= { group = "io.trino", name = "trino-spi", version.ref = "trino" }
trino-toolkit= { group = "io.trino", name = "trino-plugin-toolkit", version.ref = "trino" }
trino-testing= { group = "io.trino", name = "trino-testing", version.ref = "trino" }
trino-memory= { group = "io.trino", name = "trino-memory", version.ref = "trino" }
trino-cli= { group = "io.trino", name = "trino-cli", version.ref = "trino" }
Expand All @@ -183,7 +187,6 @@ metrics-servlets = { group = "io.dropwizard.metrics", name = "metrics-servlets",
prometheus-client = { group = "io.prometheus", name = "simpleclient", version.ref = "prometheus" }
prometheus-dropwizard = { group = "io.prometheus", name = "simpleclient_dropwizard", version.ref = "prometheus" }
prometheus-servlet = { group = "io.prometheus", name = "simpleclient_servlet", version.ref = "prometheus" }
jsqlparser = { group = "com.github.jsqlparser", name = "jsqlparser", version.ref = "jsqlparser" }
mysql-driver = { group = "mysql", name = "mysql-connector-java", version.ref = "mysql" }
postgresql-driver = { group = "org.postgresql", name = "postgresql", version.ref = "postgresql" }
minikdc = { group = "org.apache.hadoop", name = "hadoop-minikdc", version.ref = "hadoop-minikdc"}
Expand All @@ -194,6 +197,7 @@ kafka-clients = { group = "org.apache.kafka", name = "kafka-clients", version.re
kafka = { group = "org.apache.kafka", name = "kafka_2.12", version.ref = "kafka" }
curator-test = { group = "org.apache.curator", name = "curator-test", version.ref = "curator"}
cglib = { group = "cglib", name = "cglib", version.ref = "cglib"}
woodstox-core = { group = "com.fasterxml.woodstox", name = "woodstox-core", version.ref = "woodstox-core"}

ranger-intg = { group = "org.apache.ranger", name = "ranger-intg", version.ref = "ranger" }
javax-jaxb-api = { group = "javax.xml.bind", name = "jaxb-api", version.ref = "javax-jaxb-api" }
Expand All @@ -204,6 +208,9 @@ mybatis = { group = "org.mybatis", name = "mybatis", version.ref = "mybatis"}
h2db = { group = "com.h2database", name = "h2", version.ref = "h2db"}
awaitility = { group = "org.awaitility", name = "awaitility", version.ref = "awaitility" }
servlet = { group = "javax.servlet", name = "javax.servlet-api", version.ref = "servlet" }
mail = { group = "javax.mail", name = "mail", version.ref = "mail" }
rome = { group = "rome", name = "rome", version.ref = "rome" }
jettison = { group = "org.codehaus.jettison", name = "jettison", version.ref = "jettison" }

[bundles]
log4j = ["slf4j-api", "log4j-slf4j2-impl", "log4j-api", "log4j-core", "log4j-12-api"]
Expand Down

0 comments on commit c01edab

Please sign in to comment.