From d5351e1a2f051181d8723473b77637465ff56717 Mon Sep 17 00:00:00 2001 From: Qi Yu Date: Mon, 16 Oct 2023 21:04:12 +0800 Subject: [PATCH] [#509] Improvement(hive): Graviton properties are prioritized over configurations that will be passed by to specific engines (#510) ### What changes were proposed in this pull request? Make sure Gravition configuration like `a.b` will overwrite configurations(`gravition.passby.a.b`) that will be passby to `Hive` ### Why are the changes needed? The Graviton configuration has a higher priority, so it should overwrite the pass-by configurations. Fix: #509 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? UT --- .../catalog/hive/HiveCatalogOperations.java | 16 ++++++++++---- .../hive/TestHiveCatalogOperations.java | 21 +++++++++++++++++++ .../graviton/catalog/hive/TestHiveSchema.java | 16 ++++++++++++++ .../graviton/catalog/hive/TestHiveTable.java | 17 +++++++++++++++ 4 files changed, 66 insertions(+), 4 deletions(-) diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveCatalogOperations.java b/catalogs/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveCatalogOperations.java index 422561e94b..aab608937a 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveCatalogOperations.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/graviton/catalog/hive/HiveCatalogOperations.java @@ -103,21 +103,29 @@ public void initialize(Map conf) throws RuntimeException { this.tablePropertiesMetadata = new HiveTablePropertiesMetadata(); this.catalogPropertiesMetadata = new HiveCatalogPropertiesMeta(); + // Key format like graviton.bypass.a.b Map byPassConfig = Maps.newHashMap(); + // Hold keys that lie in GRAVITON_CONFIG_TO_HIVE + Map gravitonConfig = Maps.newHashMap(); + conf.forEach( (key, value) -> { if (key.startsWith(CATALOG_BYPASS_PREFIX)) { // Trim bypass prefix and pass it to hive conf byPassConfig.put(key.substring(CATALOG_BYPASS_PREFIX.length()), value); } else if (GRAVITON_CONFIG_TO_HIVE.containsKey(key)) { - byPassConfig.put(GRAVITON_CONFIG_TO_HIVE.get(key), value); - } else { - byPassConfig.put(key, value); + gravitonConfig.put(GRAVITON_CONFIG_TO_HIVE.get(key), value); } }); + Map mergeConfig = Maps.newHashMap(byPassConfig); + // `gravitonConfig` overwrite byPassConfig if possible + mergeConfig.putAll(gravitonConfig); + Configuration hadoopConf = new Configuration(); - byPassConfig.forEach(hadoopConf::set); + // Set byPass first to make graviton config overwrite it, only keys in byPassConfig + // and gravitonConfig will be passed to Hive config, and gravitonConfig has higher priority + mergeConfig.forEach(hadoopConf::set); hiveConf = new HiveConf(hadoopConf, HiveCatalogOperations.class); this.clientPool = new HiveClientPool(getClientPoolSize(conf), hiveConf); diff --git a/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveCatalogOperations.java b/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveCatalogOperations.java index e6c84fb520..876a93cf10 100644 --- a/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveCatalogOperations.java +++ b/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveCatalogOperations.java @@ -13,6 +13,7 @@ import com.datastrato.graviton.catalog.PropertyEntry; import com.google.common.collect.Maps; import java.util.Map; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -69,4 +70,24 @@ void testPropertyMeta() { Assertions.assertFalse(propertyEntryMap.get(Catalog.PROPERTY_PACKAGE).isRequired()); Assertions.assertFalse(propertyEntryMap.get(CLIENT_POOL_SIZE).isRequired()); } + + @Test + void testPropertyOverwrite() { + Map maps = Maps.newHashMap(); + maps.put("a.b", "v1"); + maps.put(CATALOG_BYPASS_PREFIX + "a.b", "v2"); + + maps.put("c.d", "v3"); + maps.put(CATALOG_BYPASS_PREFIX + "c.d", "v4"); + maps.put("e.f", "v5"); + + maps.put(METASTORE_URIS, "url1"); + maps.put(ConfVars.METASTOREURIS.varname, "url2"); + maps.put(CATALOG_BYPASS_PREFIX + ConfVars.METASTOREURIS.varname, "url3"); + HiveCatalogOperations op = new HiveCatalogOperations(null); + op.initialize(maps); + + Assertions.assertEquals("v2", op.hiveConf.get("a.b")); + Assertions.assertEquals("v4", op.hiveConf.get("c.d")); + } } diff --git a/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveSchema.java b/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveSchema.java index ef5d5e05ef..cc13441aa8 100644 --- a/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveSchema.java +++ b/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveSchema.java @@ -4,6 +4,9 @@ */ package com.datastrato.graviton.catalog.hive; +import static com.datastrato.graviton.catalog.BaseCatalog.CATALOG_BYPASS_PREFIX; +import static com.datastrato.graviton.catalog.hive.HiveCatalogPropertiesMeta.METASTORE_URIS; + import com.datastrato.graviton.NameIdentifier; import com.datastrato.graviton.Namespace; import com.datastrato.graviton.catalog.hive.miniHMS.MiniHiveMetastoreService; @@ -16,6 +19,7 @@ import java.time.Instant; import java.util.Arrays; import java.util.Map; +import org.apache.hadoop.hive.conf.HiveConf; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -36,6 +40,18 @@ private HiveCatalog initHiveCatalog() { .build(); Map conf = Maps.newHashMap(); + conf.put(METASTORE_URIS, hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname)); + conf.put( + CATALOG_BYPASS_PREFIX + HiveConf.ConfVars.METASTOREWAREHOUSE.varname, + hiveConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname)); + conf.put( + CATALOG_BYPASS_PREFIX + + HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, + hiveConf.get(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname)); + + conf.put( + CATALOG_BYPASS_PREFIX + HiveConf.ConfVars.HIVE_IN_TEST.varname, + hiveConf.get(HiveConf.ConfVars.HIVE_IN_TEST.varname)); metastore.hiveConf().iterator().forEachRemaining(e -> conf.put(e.getKey(), e.getValue())); HiveCatalog hiveCatalog = new HiveCatalog().withCatalogConf(conf).withCatalogEntity(entity); diff --git a/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveTable.java b/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveTable.java index 402e0e1c5b..bcdcfd4a9f 100644 --- a/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveTable.java +++ b/catalogs/catalog-hive/src/test/java/com/datastrato/graviton/catalog/hive/TestHiveTable.java @@ -4,6 +4,8 @@ */ package com.datastrato.graviton.catalog.hive; +import static com.datastrato.graviton.catalog.BaseCatalog.CATALOG_BYPASS_PREFIX; +import static com.datastrato.graviton.catalog.hive.HiveCatalogPropertiesMeta.METASTORE_URIS; import static com.datastrato.graviton.rel.transforms.Transforms.day; import static com.datastrato.graviton.rel.transforms.Transforms.identity; @@ -30,6 +32,7 @@ import java.time.Instant; import java.util.Arrays; import java.util.Map; +import org.apache.hadoop.hive.conf.HiveConf; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -84,6 +87,20 @@ private static void initHiveCatalog() { Map conf = Maps.newHashMap(); metastore.hiveConf().forEach(e -> conf.put(e.getKey(), e.getValue())); + + conf.put(METASTORE_URIS, hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname)); + conf.put( + CATALOG_BYPASS_PREFIX + HiveConf.ConfVars.METASTOREWAREHOUSE.varname, + hiveConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname)); + conf.put( + CATALOG_BYPASS_PREFIX + + HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, + hiveConf.get(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname)); + + conf.put( + CATALOG_BYPASS_PREFIX + HiveConf.ConfVars.HIVE_IN_TEST.varname, + hiveConf.get(HiveConf.ConfVars.HIVE_IN_TEST.varname)); + hiveCatalog = new HiveCatalog().withCatalogConf(conf).withCatalogEntity(entity); }