Skip to content

Commit

Permalink
[#478] feat(core, catalogs): Refactor catalog configurations (#489)
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

In this PR, we introduce the following points:
- Remove the catalog-related configuration from `Graviton` server
configuration file.
- Support setting catalog configuration with a separate config file
located in `catalogs` module
- Provide catalog-related configurations for a specific engine with an
unique prefix.

### Why are the changes needed?

Fix: #478 
Fix: #196 

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

New UTs
  • Loading branch information
yuqi1129 authored Oct 12, 2023
1 parent 40dc8f5 commit 4d9e7a9
Show file tree
Hide file tree
Showing 19 changed files with 343 additions and 118 deletions.
8 changes: 4 additions & 4 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ tasks {
val outputDir = projectDir.dir("distribution")

val compileDistribution by registering {
dependsOn("copySubprojectDependencies", "copyCatalogLibs", "copySubprojectLib")
dependsOn("copySubprojectDependencies", "copyCatalogLibAndConfigs", "copySubprojectLib")

group = "graviton distribution"
outputs.dir(projectDir.dir("distribution/package"))
Expand Down Expand Up @@ -230,9 +230,9 @@ tasks {
}
}

val copyCatalogLibs by registering(Copy::class) {
dependsOn(":catalogs:catalog-hive:copyCatalogLibs",
":catalogs:catalog-lakehouse-iceberg:copyCatalogLibs")
val copyCatalogLibAndConfigs by registering(Copy::class) {
dependsOn(":catalogs:catalog-hive:copyLibAndConfig",
":catalogs:catalog-lakehouse-iceberg:copyLibAndConfig")
}

clean {
Expand Down
22 changes: 22 additions & 0 deletions catalogs/catalog-hive/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,26 @@ tasks {
from("build/libs")
into("${rootDir}/distribution/package/catalogs/hive/libs")
}

val copyCatalogConfig by registering(Copy::class) {
from("src/main/resources")
into("${rootDir}/distribution/package/catalogs/hive/conf")

include("hive.conf")
include("hive-site.xml.template")

rename { original -> if (original.endsWith(".template")) {
original.replace(".template", "")
} else {
original
}}

exclude { details ->
details.file.isDirectory()
}
}

val copyLibAndConfig by registering(Copy::class) {
dependsOn(copyCatalogConfig, copyCatalogLibs)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
package com.datastrato.graviton.catalog.hive;

import static com.datastrato.graviton.catalog.BaseCatalog.CATALOG_BYPASS_PREFIX;
import static com.datastrato.graviton.catalog.hive.HiveTable.SUPPORT_TABLE_TYPES;
import static com.datastrato.graviton.catalog.hive.HiveTablePropertiesMetadata.COMMENT;
import static com.datastrato.graviton.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE;
Expand Down Expand Up @@ -63,7 +64,7 @@ public class HiveCatalogOperations implements CatalogOperations, SupportsSchemas

@VisibleForTesting HiveClientPool clientPool;

private HiveConf hiveConf;
@VisibleForTesting HiveConf hiveConf;

private final CatalogEntity entity;

Expand All @@ -90,6 +91,15 @@ public void initialize(Map<String, String> conf) throws RuntimeException {
conf.forEach(hadoopConf::set);
hiveConf = new HiveConf(hadoopConf, HiveCatalogOperations.class);

// Overwrite hive conf with graviton conf if exists
conf.forEach(
(key, value) -> {
if (key.startsWith(CATALOG_BYPASS_PREFIX)) {
// Trim bypass prefix and pass it to hive conf
hiveConf.set(key.substring(CATALOG_BYPASS_PREFIX.length()), value);
}
});

// todo(xun): add hive client pool size in config
this.clientPool = new HiveClientPool(1, hiveConf);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!--
~ Copyright 2023 Datastrato.
~ This software is licensed under the Apache License version 2.
-->
<configuration>
</configuration>

12 changes: 12 additions & 0 deletions catalogs/catalog-hive/src/main/resources/hive.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# Copyright 2023 Datastrato.
# This software is licensed under the Apache License version 2.
#

## This file holds common properties for hive catalog and metastore, for more, please refer to
## `org.apache.hadoop.hive.conf.HiveConf`

## If we want to specify Hive catalog-related configuration like 'hive.metastore.client.capability.check', we can do it like this:
## graviton.bypass.hive.metastore.client.capability.check = true, and 'graviton.bypass' is the prefix that
## the configuration will be directly by pass to backend engine, and 'hive.metastore.client.capability.check' is the configuration key.
graviton.bypass.hive.metastore.client.capability.check = false
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright 2023 Datastrato.
* This software is licensed under the Apache License version 2.
*/

package com.datastrato.graviton.catalog.hive;

import static com.datastrato.graviton.catalog.BaseCatalog.CATALOG_BYPASS_PREFIX;

import com.google.common.collect.Maps;
import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

class TestHiveCatalogOperations {
@Test
void testInitialize() {
Map<String, String> properties = Maps.newHashMap();
HiveCatalogOperations hiveCatalogOperations = new HiveCatalogOperations(null);
hiveCatalogOperations.initialize(properties);
String v = hiveCatalogOperations.hiveConf.get("mapreduce.job.reduces");
Assertions.assertEquals("10", v);

// Test If we can override the value in hive-site.xml
properties.put(CATALOG_BYPASS_PREFIX + "mapreduce.job.reduces", "20");
hiveCatalogOperations.initialize(properties);
v = hiveCatalogOperations.hiveConf.get("mapreduce.job.reduces");
Assertions.assertEquals("20", v);

// Test If user properties can override the value in hive-site.xml
properties.clear();
properties.put("mapreduce.job.reduces", "30");
hiveCatalogOperations.initialize(properties);
v = hiveCatalogOperations.hiveConf.get("mapreduce.job.reduces");
Assertions.assertEquals("30", v);
}
}
16 changes: 16 additions & 0 deletions catalogs/catalog-hive/src/test/resources/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!--
~ Copyright 2023 Datastrato.
~ This software is licensed under the Apache License version 2.
-->
<configuration>
<property>
<name>hive.metastore.client.capability.check</name>
<value>true</value>
</property>

<property>
<name>mapreduce.job.reduces</name>
<value>10</value>
</property>
</configuration>

23 changes: 23 additions & 0 deletions catalogs/catalog-lakehouse-iceberg/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,27 @@ tasks {
from("build/libs")
into("${rootDir}/distribution/package/catalogs/lakehouse-iceberg/libs")
}

val copyCatalogConfig by registering(Copy::class) {
from("src/main/resources")
into("${rootDir}/distribution/package/catalogs/lakehouse-iceberg/conf")

include("lakehouse-iceberg.conf")
include("hive-site.xml.template")
include("hdfs-site.xml.template")

rename { original -> if (original.endsWith(".template")) {
original.replace(".template", "")
} else {
original
}}

exclude { details ->
details.file.isDirectory()
}
}

val copyLibAndConfig by registering(Copy::class) {
dependsOn(copyCatalogLibs, copyCatalogConfig)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<!--
~ Copyright 2023 Datastrato.
~ This software is licensed under the Apache License version 2.
-->
<configuration>
</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!--
~ Copyright 2023 Datastrato.
~ This software is licensed under the Apache License version 2.
-->
<configuration>
</configuration>

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#
# Copyright 2023 Datastrato.
# This software is licensed under the Apache License version 2.
#

## This file holds common configurations for Lakehouse-iceberg catalog. The format of the key is
## 'graviton.bypass.{iceberg-inner-config-key}' and `iceberg-inner-config-key` is the
## real key that pass to Lakehouse-iceberg catalog.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Streams;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -79,8 +80,8 @@ public GravitonAuxiliaryService loadAuxService(
}

@VisibleForTesting
public IsolatedClassLoader getIsolatedClassLoader(String classPath) {
return IsolatedClassLoader.buildClassLoader(classPath);
public IsolatedClassLoader getIsolatedClassLoader(List<String> classPaths) {
return IsolatedClassLoader.buildClassLoader(classPaths);
}

@VisibleForTesting
Expand Down Expand Up @@ -115,7 +116,7 @@ private void registerAuxService(String auxServiceName, Map<String, String> confi
classPath = getValidPath(auxServiceName, classPath);
LOG.info("AuxService name:{}, config:{}, classpath:{}", auxServiceName, config, classPath);

IsolatedClassLoader isolatedClassLoader = getIsolatedClassLoader(classPath);
IsolatedClassLoader isolatedClassLoader = getIsolatedClassLoader(Lists.newArrayList(classPath));
try {
GravitonAuxiliaryService gravitonAuxiliaryService =
loadAuxService(auxServiceName, isolatedClassLoader);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The abstract base class for Catalog implementations.
Expand All @@ -27,13 +29,22 @@
public abstract class BaseCatalog<T extends BaseCatalog>
implements Catalog, CatalogProvider, HasPropertyMetadata {

private static final Logger LOG = LoggerFactory.getLogger(BaseCatalog.class);

private CatalogEntity entity;

private Map<String, String> conf;

private volatile CatalogOperations ops;

private volatile Map<String, String> properties;

// Any Graviton configuration that starts with this prefix will be trim and passed to the specific
// catalog implementation. For example, if the configuration is
// "graviton.bypass.hive.metastore.uris",
// then we will trim the prefix and pass "hive.metastore.uris" to the hive client configurations.
public static final String CATALOG_BYPASS_PREFIX = "graviton.bypass.";

/**
* Creates a new instance of CatalogOperations.
*
Expand Down
Loading

0 comments on commit 4d9e7a9

Please sign in to comment.