Skip to content

Commit

Permalink
add conf spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits
Browse files Browse the repository at this point in the history
  • Loading branch information
zhli1142015 committed Nov 3, 2023
1 parent 53f3617 commit e4078e2
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ const std::string kAbandonPartialAggregationMinRows =
"spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows";
const std::string kBloomFilterExpectedNumItems = "spark.sql.optimizer.runtime.bloomFilter.expectedNumItems";
const std::string kBloomFilterNumBits = "spark.sql.optimizer.runtime.bloomFilter.numBits";
const std::string kBloomFilterMaxNumBits = "spark.sql.optimizer.runtime.bloomFilter.maxNumBits";
const std::string kBloomFilterMaxNumBits = "spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits";

// metrics
const std::string kDynamicFiltersProduced = "dynamicFiltersProduced";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.spark.sql

import io.glutenproject.GlutenConfig

import org.apache.spark.sql.internal.SQLConf

class GlutenBloomFilterAggregateQuerySuite
Expand All @@ -25,9 +27,12 @@ class GlutenBloomFilterAggregateQuerySuite

test("Test bloom_filter_agg with big RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS") {
val table = "bloom_filter_test"
withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS.key -> "5000000") {
withSQLConf(
SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS.key -> "5000000",
GlutenConfig.COLUMNAR_VELOX_BLOOM_FILTER_MAX_NUM_BITS.key -> "4194304"
) {
val numEstimatedItems = 5000000L
val numBits = SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)
val numBits = GlutenConfig.getConf.veloxBloomFilterMaxNumBits
val sqlString = s"""
|SELECT every(might_contain(
| (SELECT bloom_filter_agg(col,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.spark.sql

import io.glutenproject.GlutenConfig

import org.apache.spark.sql.internal.SQLConf

class GlutenBloomFilterAggregateQuerySuite
Expand All @@ -25,9 +27,12 @@ class GlutenBloomFilterAggregateQuerySuite

test("Test bloom_filter_agg with big RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS") {
val table = "bloom_filter_test"
withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS.key -> "5000000") {
withSQLConf(
SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS.key -> "5000000",
GlutenConfig.COLUMNAR_VELOX_BLOOM_FILTER_MAX_NUM_BITS.key -> "4194304"
) {
val numEstimatedItems = 5000000L
val numBits = SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)
val numBits = GlutenConfig.getConf.veloxBloomFilterMaxNumBits
val sqlString = s"""
|SELECT every(might_contain(
| (SELECT bloom_filter_agg(col,
Expand Down
12 changes: 11 additions & 1 deletion shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {

def veloxSpillFileSystem: String = conf.getConf(COLUMNAR_VELOX_SPILL_FILE_SYSTEM)

def veloxBloomFilterMaxNumBits: Long = conf.getConf(COLUMNAR_VELOX_BLOOM_FILTER_MAX_NUM_BITS)

def chColumnarShufflePreferSpill: Boolean = conf.getConf(COLUMNAR_CH_SHUFFLE_PREFER_SPILL_ENABLED)

def chColumnarShuffleSpillThreshold: Long = conf.getConf(COLUMNAR_CH_SHUFFLE_SPILL_THRESHOLD)
Expand Down Expand Up @@ -425,7 +427,7 @@ object GlutenConfig {
"spark.io.compression.codec",
"spark.sql.optimizer.runtime.bloomFilter.expectedNumItems",
"spark.sql.optimizer.runtime.bloomFilter.numBits",
"spark.sql.optimizer.runtime.bloomFilter.maxNumBits"
COLUMNAR_VELOX_BLOOM_FILTER_MAX_NUM_BITS.key
)
keys.forEach(
k => {
Expand Down Expand Up @@ -1270,4 +1272,12 @@ object GlutenConfig {
+ "partial aggregation may be early abandoned.")
.intConf
.createOptional

val COLUMNAR_VELOX_BLOOM_FILTER_MAX_NUM_BITS =
buildConf("spark.gluten.sql.columnar.backend.velox.bloomFilter.maxNumBits")
.internal()
.doc("The max number of bits to use for the velox bloom filter: " +
"'spark.bloom_filter.max_num_bits'")
.longConf
.createWithDefault(4194304L)
}

0 comments on commit e4078e2

Please sign in to comment.