Skip to content

Commit

Permalink
[VL] Allow users to set bloom filter configurations
Browse files Browse the repository at this point in the history
  • Loading branch information
zhli1142015 committed Nov 3, 2023
1 parent 55f1480 commit 53f3617
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
9 changes: 9 additions & 0 deletions cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ const std::string kAbandonPartialAggregationMinPct =
"spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct";
const std::string kAbandonPartialAggregationMinRows =
"spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows";
const std::string kBloomFilterExpectedNumItems = "spark.sql.optimizer.runtime.bloomFilter.expectedNumItems";
const std::string kBloomFilterNumBits = "spark.sql.optimizer.runtime.bloomFilter.numBits";
const std::string kBloomFilterMaxNumBits = "spark.sql.optimizer.runtime.bloomFilter.maxNumBits";

// metrics
const std::string kDynamicFiltersProduced = "dynamicFiltersProduced";
Expand Down Expand Up @@ -363,6 +366,12 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kSpillableReservationGrowthPct] =
getConfigValue(confMap_, kSpillableReservationGrowthPct, "25");
configs[velox::core::QueryConfig::kSpillCompressionKind] = getConfigValue(confMap_, kSpillCompressionKind, "lz4");
configs[velox::core::QueryConfig::kSparkBloomFilterExpectedNumItems] =
getConfigValue(confMap_, kBloomFilterExpectedNumItems, "1000000");
configs[velox::core::QueryConfig::kSparkBloomFilterNumBits] =
getConfigValue(confMap_, kBloomFilterNumBits, "8388608");
configs[velox::core::QueryConfig::kSparkBloomFilterMaxNumBits] =
getConfigValue(confMap_, kBloomFilterMaxNumBits, "4194304");
} catch (const std::invalid_argument& err) {
std::string errDetails = err.what();
throw std::runtime_error("Invalid conf arg: " + errDetails);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,10 @@ object GlutenConfig {
SQLConf.SESSION_LOCAL_TIMEZONE.key,
GLUTEN_DEFAULT_SESSION_TIMEZONE_KEY,
SQLConf.LEGACY_SIZE_OF_NULL.key,
"spark.io.compression.codec"
"spark.io.compression.codec",
"spark.sql.optimizer.runtime.bloomFilter.expectedNumItems",
"spark.sql.optimizer.runtime.bloomFilter.numBits",
"spark.sql.optimizer.runtime.bloomFilter.maxNumBits"
)
keys.forEach(
k => {
Expand Down

0 comments on commit 53f3617

Please sign in to comment.